SAX XInclude Driver

/*--

 Copyright 2001 Elliotte Rusty Harold.
 All rights reserved.

 I haven't yet decided on a license.
 It will be some form of open source.

 THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT SHALL ELLIOTTE RUSTY HAROLD OR ANY
 OTHER CONTRIBUTORS TO THIS PACKAGE
 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

 */

package com.macfaq.xml;

import org.xml.sax.SAXException;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.XMLReader;
import org.xml.sax.Locator;
import org.xml.sax.Attributes;
import org.xml.sax.ext.LexicalHandler;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.OutputStream;
import java.io.Writer;
import java.io.OutputStreamWriter;
import java.io.File;
import java.net.URL;
import java.net.MalformedURLException;
import java.util.Stack;

/**
 * <p><code>SAXXIncluder</code> is a simple <code>ContentHandler</code> that
 * writes its XML document onto an output stream after resolving
 * all <code>xinclude:include</code> elements.
 * </p>
 *
 * <p>
 *    The only current known bug is that the notation and
 *    unparsed entity information items are not included
 *    in the result infoset. Furthermore, processing 
 *    instructions in the DTD are not included. Note that this is 
 *    only relevant to the source infoset. The DOCTYPE declaration
 *    is specifically excluded from included infosets.
 * </p>
 *
 *  <p> 
 *     I also need to check how section 4.4.3.1 applies for inscope
 *     namespaces in included documents. Currently this is not an issue
 *     because I only include full documents, but it may become an
 *     an issue when XPointer support is added. 
 *  </p>
 *
 *  <p> 
 *     There's no XPointer support yet. Only full documents are
 *     included.
 *  </p>
 *
 *  <p> 
 *     The parser used to drive this must support the <code>LexicalHandler</code>
 *     interface. It must also provide a <code>Locator</code> object. 
 *     These are optional in SAX, but Xerces-J does support these features.
 *  </p>
 *
 * @author Elliotte Rusty Harold
 * @version 1.0d8
 */
public class SAXXIncluder implements ContentHandler, LexicalHandler {

    private Writer out;
    private String encoding;
   
    // should try to combine two constructors so as not to duplicate
    // code
    public SAXXIncluder(OutputStream out, String encoding)
      throws UnsupportedEncodingException {
        this.out = new OutputStreamWriter(out, encoding);
        this.encoding = encoding;
    }

    public SAXXIncluder(OutputStream out) {
        try {
          this.out = new OutputStreamWriter(out, "UTF8");
          this.encoding="UTF-8";
        }
        catch (UnsupportedEncodingException e) {
          // This really shouldn't happen
        }    
    }

    public void setDocumentLocator(Locator locator) {}
    
    public void startDocument() throws SAXException {

        try {
            out.write("<?xml version='1.0' encoding='" 
              + encoding + "'?>\r\n");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }        
        
    }
    
    public void endDocument() throws SAXException {
        
        try {
            out.flush();
        }
        catch (IOException e) {
            throw new SAXException("Flush failed", e);       
        }
        
    }
    
    public void startPrefixMapping(String prefix, String uri)
      throws SAXException {
        
    }
    
    public void endPrefixMapping(String prefix)
      throws SAXException {
        
    }

    public void startElement(String namespaceURI, String localName,
      String qualifiedName, Attributes atts)
      throws SAXException {

        try {
            out.write("<" + qualifiedName);
            for (int i = 0; i < atts.getLength(); i++) {
                out.write(" ");   
                out.write(atts.getQName(i));   
                out.write("='");
                String value = atts.getValue(i);
                // + 4 allows space for one entitiy reference.
                // If there's more than that, then the StringBuffer
                // will automatically expand
                // Need to use character references if the encoding
                // can't support the character
                StringBuffer encodedValue=new StringBuffer(value.length() + 4);
                for (int j = 0; j < value.length(); j++) {
                    char c = value.charAt(j);
                    if (c == '&') encodedValue.append("&amp;");
                    else if (c == '<') encodedValue.append("&lt;");
                    else if (c == '>') encodedValue.append("&gt;");
                    else if (c == '\'') encodedValue.append("&apos;");
                    else encodedValue.append(c);    
                }
                out.write(encodedValue.toString());   
                out.write("'");
            }
            out.write(">");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }        
        
    }
      
    public void endElement(String namespaceURI, String localName,
      String qualifiedName) throws SAXException {
        
        try {
            out.write("</" + qualifiedName + ">");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
            
    }

    // need to escape characters that are not in the given 
    // encoding using character references????
    // need to escape characters that are not in the given 
    // encoding using character references????
    public void characters(char[] ch, int start, int length) 
      throws SAXException {
        
        try {
            for (int i = 0; i < length; i++) {
                char c = ch[start+i];
                if (c == '&') out.write("&amp;");
                else if (c == '<') out.write("&lt;");
                else out.write(c);
            }
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
    
    }

    public void ignorableWhitespace(char[] ch, int start, int length)
      throws SAXException {
        this.characters(ch, start, length);   
    }

    // do I need to escape text in PI????
    public void processingInstruction(String target, String data)
      throws SAXException {

        try {
            out.write("<?" + target + " " + data + "?>");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
        
    }

    public void skippedEntity(String name) throws SAXException {
        
        try {
            out.write("&" + name + ";");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
        
    }

    // LexicalHandler methods
    private boolean inDTD = false;
    private Stack entities = new Stack();
    
    public void startDTD(String name, String publicId, String systemId)
      throws SAXException {
        inDTD = true;
        // if this is the source document, output a DOCTYPE declaration
        if (entities.size() == 0) {
            String id;
            if (publicId != null) id = "PUBLIC \"" + publicId + "\" \"" + systemId + '"';
            else id = "SYSTEM \"" + systemId + '"';
            try {
                out.write("<!DOCTYPE " + name + " " + id + ">\r\n");
            }
            catch (IOException e) {
                throw new SAXException("Error while writing DOCTYPE", e);   
            }
        }
    }
    public void endDTD() throws SAXException { }
    
    public void startEntity(String name) throws SAXException {
        entities.push(name);
    }
    
    
    public void endEntity(String name) throws SAXException {
        entities.pop();
    }
    
    public void startCDATA() throws SAXException {}
    public void endCDATA() throws SAXException {}

    // Just need this reference so we can ask if a comment is 
    // inside an include element or not
    private XIncludeFilter filter = null;

    public void setFilter(XIncludeFilter filter) {
        this.filter = filter;
    } 
    
    public void comment(char[] ch, int start, int length)
      throws SAXException {
        
        if (!inDTD && !filter.insideIncludeElement()) {
            try {
                out.write("<!--");
                out.write(ch, start, length);
                out.write("-->");
            }
            catch (IOException e) {
                throw new SAXException("Write failed", e);       
            }
        }
      
    }    
    
    /**
      * <p>
      * The driver method for the SAXXIncluder program.
      * </p>
      *
      * @param args  contains the URLs and/or filenames
      *              of the documents to be procesed.
      */
    public static void main(String[] args) {

        // make this more robust
        XMLReader parser; 
        try {
            parser = XMLReaderFactory.createXMLReader();
        } 
        catch (SAXException e) {
            try {
                parser = XMLReaderFactory.createXMLReader(
                  "org.apache.xerces.parsers.SAXParser");
            }
            catch (SAXException e2) {
                System.err.println("Could not find an XML parser");
                return;
            }
        }
        
        // Need better namespace handling
        try {
            parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
        }
        catch (SAXException e) {
            System.err.println(e);
            return;
        }   
        
        for (int i = 0; i < args.length; i++) {
            try {
               /* URL base;
                try {
                    base = new URL(args[i]);
                }
                catch (MalformedURLException e) {
                    File f = new File(args[i]);
                    base = f.toURL();
                } */
                XIncludeFilter includer = new XIncludeFilter(); 
                includer.setParent(parser);
                SAXXIncluder s = new SAXXIncluder(System.out);
                includer.setContentHandler(s);
                try {
                    includer.setProperty(
                      "http://xml.org/sax/properties/lexical-handler",
                       s);
                    s.setFilter(includer);
                }
                catch (SAXException e) {
                    // Will not support comments
                } 
                includer.parse(args[i]);
            }
            catch (Exception e) { // be specific about exceptions????
                System.err.println(e);
                e.printStackTrace();
            }
        }

    }

}

Previous | Next | Top | Cafe con Leche

Copyright 2000, 2001 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified August 21, 2001