Implementation as JDOM

package com.macfaq.xml;

import java.net.*;
import java.util.*;
import java.io.*;
import org.jdom.*;
import org.jdom.input.*;
import org.jdom.output.*;

public class XIncluder {

  public final static Namespace XINCLUDE_NAMESPACE
    = Namespace.getNamespace("xinclude", "http://www.w3.org/1999/XML/xinclude");

  private static SAXBuilder builder = new SAXBuilder();

  public static Document resolve(Document original, String base)
   throws IOException, JDOMException {

    if (original == null) throw new NullPointerException("Document must not be null");

    Element  root     = original.getRootElement();

    // check to see if root element has an xml:base ????

    Element  resolved = (Element) resolve(root, base);

    // catch a ClassCastException if a String is returned????

    Document result   = new Document(resolved, original.getDocType());

    Iterator iterator = original.getMixedContent().iterator();
    while (iterator.hasNext()) {
      Object o = iterator.next();
      if (o instanceof Comment) {
        Comment c = (Comment) o;
        result.addContent((Comment) c.clone());
      }
      else if (o instanceof ProcessingInstruction) {
        ProcessingInstruction pi =(ProcessingInstruction) o;
        result.addContent((ProcessingInstruction) pi.clone());
      }
    }

    return result;
  }

  // either returns an Element or a String
  public static Object resolve(Element original, String base)
   throws IOException, JDOMException {

    if (original == null) throw new NullPointerException("You can't XInclude a null element.");
    Stack bases = new Stack();
    if (base != null) bases.push(base);

    Object result = resolve(original, bases);
    bases.pop();
    return result;

  }


  // either returns an Element or a String
  protected static Object resolve(Element original, Stack bases)
   throws IOException, JDOMException {

    Element result;
    String base = "";
    if (bases.size() != 0) base = (String) bases.peek();
    Attribute href = original.getAttribute("href", XINCLUDE_NAMESPACE);
    Attribute baseAttribute = original.getAttribute("base", Namespace.XML_NAMESPACE);
    if (baseAttribute != null) base = baseAttribute.getValue();

    if (href == null) { // recursively process children
       result = new Element(original.getName(), original.getNamespace());
       Iterator attributes = original.getAttributes().iterator();
       while (attributes.hasNext()) {
         Attribute a = (Attribute) attributes.next();
         result.addAttribute((Attribute) a.clone());
       }
       List children = original.getMixedContent();

       Iterator iterator = children.iterator();
       while (iterator.hasNext()) {
         Object o = iterator.next();
         if (o instanceof Element) {
           Element e = (Element) o;
           Object resolved = resolve(e, bases);
           if (resolved instanceof String) result.addContent((String) resolved);
           else result.addContent((Element) resolved);
         }
         else if (o instanceof String) {
           result.addContent((String) o);
         }
         else if (o instanceof Comment) {
           result.addContent((Comment) o);
         }
         else if (o instanceof CDATA) {
           result.addContent((CDATA) o);
         }
         else if (o instanceof ProcessingInstruction) {
           result.addContent((ProcessingInstruction) o);
         }
       }
    }
    else {
      boolean parse = true;
      Attribute parseAttribute = original.getAttribute("parse", XINCLUDE_NAMESPACE);
      if (parseAttribute != null) {
        if (parseAttribute.getValue().equals("text")) parse = false;
      }
      URL remote;
      if (base != null) {
        URL context = new URL(base);
        remote = new URL(context, href.getValue());
      }
      else {
        remote = new URL(href.getValue());
      }

      // need to handle unparsed results too
      // need to watch out for loops
      if (parse) {
                 // checks for equality (OK) or identity (not OK)????
        if (bases.contains(remote.toExternalForm())) {
          throw new RuntimeException("Circular XInclude Reference!");
        }
        Document doc = builder.build(remote);
        bases.push(remote.toExternalForm());
        result = (Element) resolve(doc.getRootElement(), bases);
        bases.pop();
      }
      else { // insert text
        return getURL(remote);
      }
    }
    return result;

  }

  public static String getURL(URL source) throws IOException {
    StringBuffer s = new StringBuffer();
    InputStream in = new BufferedInputStream(source.openStream());
    // does XInclude give you anything to specify the character set????
    InputStreamReader reader = new InputStreamReader(in, "8859_1");
    int c;
    while ((c = in.read()) != -1) {
      if (c == '<') s.append("&lt;");
      else if (c == '&') s.append("&amp;");
      else s.append((char) c);
    }
    return s.toString();
  }

  public static void main(String[] args) {

    SAXBuilder builder = new SAXBuilder();
    XMLOutputter outputter = new XMLOutputter();
    for (int i = 0; i < args.length; i++) {
      try {
        Document input = builder.build(args[i]);
        // absolutize URL
        String base = args[i];
        if (base.indexOf(':') < 0) {
          File f = new File(base);
          base = f.toURL().toExternalForm();
        }
        Document output = resolve(input, base);
        // need to set encoding on this to Latin-1 and check what
        // happens to UTF-8 curly quotes
        outputter.output(output, System.out);
      }
      catch (Exception e) {
        System.err.println(e);
        e.printStackTrace();
      }
    }

  }

}

Previous | Next | Top | Cafe con Leche

Copyright 2000 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified September 30, 2000