Implementation as JDOM

/*--

 Copyright 2000 Elliotte Rusty Harold.
 All rights reserved.

 I haven't yet decided on a license.
 It will be some form of open source.

 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

 */

package com.macfaq.xml;

import java.net.URL;
import java.net.MalformedURLException;
import java.util.Stack;
import java.util.Iterator;
import java.util.List;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.BufferedInputStream;
import java.io.InputStream;
import org.jdom.Namespace;
import org.jdom.Comment;
import org.jdom.CDATA;
import org.jdom.JDOMException;
import org.jdom.Attribute;
import org.jdom.Element;
import org.jdom.ProcessingInstruction;
import org.jdom.Document;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;

/**
 * <p><code>XIncluder</code> provides methods to
 * resolve JDOM elements and documents to produce
 * a new Document or Element with all
 * XInclude references resolved.
 * </p>
 *
 *
 * @author Elliotte Rusty Harold
 * @version 1.0d2
 */
public class XIncluder {

  public final static Namespace XINCLUDE_NAMESPACE
    = Namespace.getNamespace("xinclude", "http://www.w3.org/1999/XML/xinclude");

  // No instances allowed
  private XIncluder() {}

  private static SAXBuilder builder = new SAXBuilder();

  /**
    * <p>
    * This method resolves a JDOM <code>Document</code>
    * and merges in all XInclude references.
    * If a referenced document cannot be found it is replaced with
    * an error message. The Document object returned is a new document.
    * The original <code>Document</code> is not changed.
    * </p>
    *
    * @param original <code>Document</code> that will be processed
    * @param base     <code>String</code> form of the base URI against which
    *                 relative URLs will be resolved. This can be null if the
    *                 document includes an <code>xml:base</code> attribute.
    * @return Document new <code>Document</code> object in which all
    *                  XInclude elements have been replaced.
    */
    public static Document resolve(Document original, String base)
      throws CircularIncludeException, MalformedURLException {

    if (original == null) throw new NullPointerException("Document must not be null");

    Element root = original.getRootElement();
    Element resolved = (Element) resolve(root, base);

    // catch a ClassCastException if a String is returned????
    // Is the root element allowed to be replaced by
    // an xinclude:type="text"

    Document result = new Document(resolved, original.getDocType());

    Iterator iterator = original.getMixedContent().iterator();
    while (iterator.hasNext()) {
      Object o = iterator.next();
      if (o instanceof Comment) {
        Comment c = (Comment) o;
        result.addContent((Comment) c.clone());
      }
      else if (o instanceof ProcessingInstruction) {
        ProcessingInstruction pi =(ProcessingInstruction) o;
        result.addContent((ProcessingInstruction) pi.clone());
      }
    }

    return result;
  }

  /**
    * <p>
    * This method resolves a JDOM <code>Element</code>
    * and merges in all XInclude references. This process is recursive.
    * The element returned contains no XInclude elements.
    * If a referenced document cannot be found it is replaced with
    * an error message. The <code>Element</code> object returned is a new element.
    * The original <code>Element</code> is not changed.
    * </p>
    *
    * @param original <code>Element</code> that will be processed
    * @param base     <code>String</code> form of the base URI against which
    *                 relative URLs will be resolved. This can be null if the
    *                 element includes an <code>xml:base</code> attribute.
    * @return Object  Either an <code>Element</code>
    *                 (<code>xinclude:type="text"</code>) or a <code>String</code>
    *                 (<code>xinclude:type="parse"</code>)
    */
   public static Object resolve(Element original, String base)
     throws CircularIncludeException, MalformedURLException {

    if (original == null) {
      throw new NullPointerException("You can't XInclude a null element.");
    }
    Stack bases = new Stack();
    if (base != null) bases.push(base);

    Object result = resolve(original, bases);
    bases.pop();
    return result;

  }

  private static boolean isIncludeElement(Element element) {
    if (element.getName().equals("include") &&
        element.getNamespace().equals(XINCLUDE_NAMESPACE)) {
      return true;
    }
    return false;
  }


  // either returns an Element or a String
  protected static Object resolve(Element original, Stack bases)
   throws CircularIncludeException {

    Element result;
    String base = "";
    if (bases.size() != 0) base = (String) bases.peek();

    if (isIncludeElement(original)) {
      Attribute href = original.getAttribute("href");
      if (href == null) { // illegal, what kind of exception????
        throw new IllegalArgumentException("Missing href attribute");
      }
      Attribute baseAttribute
       = original.getAttribute("base", Namespace.XML_NAMESPACE);
      if (baseAttribute != null) base = baseAttribute.getValue();
      boolean parse = true;
      Attribute parseAttribute = original.getAttribute("parse");
      if (parseAttribute != null) {
        if (parseAttribute.getValue().equals("text")) parse = false;
      }

      URL remote;
      if (base != null) {
        try {
          URL context = new URL(base);
          remote = new URL(context, href.getValue());
        }
        catch (MalformedURLException ex) {
          return "Unresolvable URL " + base + "/" + href.getValue();
        }
      }
      else {
        try {
          remote = new URL(href.getValue());
        }
        catch (MalformedURLException ex) {
          return "Unresolvable URL " + href.getValue();
        }
      }

      if (parse) {
                 // checks for equality (OK) or identity (not OK)????
        if (bases.contains(remote.toExternalForm())) {
          // need to figure out how to get file and number where
          // bad include occurs
          throw new CircularIncludeException(
            "Circular XInclude Reference to "
           + remote.toExternalForm() + " in " );
        }

        try {
          Document doc = builder.build(remote);
          bases.push(remote.toExternalForm());
          result = (Element) resolve(doc.getRootElement(), bases);
          bases.pop();
        }
        // Make this configurable
        catch (JDOMException e) {
           return "Document not found: " + remote.toExternalForm()
            + "\r\n" + e.getMessage();
        }
      }
      else { // insert text
        return getURL(remote);
      }

    }
    // not an include element
    else { // recursively process children
       result = new Element(original.getName(), original.getNamespace());
       Iterator attributes = original.getAttributes().iterator();
       while (attributes.hasNext()) {
         Attribute a = (Attribute) attributes.next();
         result.addAttribute((Attribute) a.clone());
       }
       List children = original.getMixedContent();

       Iterator iterator = children.iterator();
       while (iterator.hasNext()) {
         Object o = iterator.next();
         if (o instanceof Element) {
           Element e = (Element) o;
           Object resolved = resolve(e, bases);
           if (resolved instanceof String) {
               result.addContent((String) resolved);
           }
           else result.addContent((Element) resolved);
         }
         else if (o instanceof String) {
           result.addContent((String) o);
         }
         else if (o instanceof Comment) {
           result.addContent((Comment) o);
         }
         else if (o instanceof CDATA) {
           result.addContent((CDATA) o);
         }
         else if (o instanceof ProcessingInstruction) {
           result.addContent((ProcessingInstruction) o);
         }
       }
    }

    return result;

  }

  public static String getURL(URL source) {
    StringBuffer s = new StringBuffer();
    try {
      InputStream in = new BufferedInputStream(source.openStream());
      // does XInclude give you anything to specify the character set????
      InputStreamReader reader = new InputStreamReader(in, "8859_1");
      int c;
      while ((c = in.read()) != -1) {
        if (c == '<') s.append("&lt;");
        else if (c == '&') s.append("&amp;");
        else s.append((char) c);
      }
      return s.toString();
    }
    catch (IOException e) {
      e.printStackTrace();
      return "Document not found: " + source.toExternalForm();
    }
  }

  public static void main(String[] args) {

    SAXBuilder builder = new SAXBuilder();
    XMLOutputter outputter = new XMLOutputter();
    for (int i = 0; i < args.length; i++) {
      try {
        Document input = builder.build(args[i]);
        // absolutize URL
        String base = args[i];
        if (base.indexOf(':') < 0) {
          File f = new File(base);
          base = f.toURL().toExternalForm();
        }
        Document output = resolve(input, base);
        // need to set encoding on this to Latin-1 and check what
        // happens to UTF-8 curly quotes
        outputter.output(output, System.out);
      }
      catch (Exception e) {
        System.err.println(e);
        e.printStackTrace();
      }
    }

  }

}

Previous | Next | Top | Cafe con Leche

Copyright 2000, 2001 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified January 13, 2001