SAX XInclude Driver

/*--

 Copyright 2001 Elliotte Rusty Harold.
 All rights reserved.

 I haven't yet decided on a license.
 It will be some form of open source.

 THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT SHALL ELLIOTTE RUSTY HAROLD OR ANY
 OTHER CONTRIBUTORS TO THIS PACKAGE
 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

 */

package com.macfaq.xml;

import org.xml.sax.SAXException;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.XMLReader;
import org.xml.sax.Locator;
import org.xml.sax.Attributes;
import org.xml.sax.ext.LexicalHandler;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.OutputStream;
import java.io.Writer;
import java.io.OutputStreamWriter;
import java.io.File;
import java.net.URL;
import java.net.MalformedURLException;

/**
 * <p><code>SAXXIncluder</code> is a simple ContentHandler that
 * writes its XML document onto an output stream after resolving
 * all <code>xinclude:include</code> elements.
 * </p>
 *
 *
 * @author Elliotte Rusty Harold
 * @version 1.0d1
 */
public class SAXXIncluder implements ContentHandler, LexicalHandler {

    private Writer out;
    private String encoding;
   
    
    // should try to combine two constructors so as not to duplicate
    // code
    public SAXXIncluder(OutputStream out, String encoding)
      throws UnsupportedEncodingException {
        this.out = new OutputStreamWriter(out, encoding);
        this.encoding = encoding;
    }

    public SAXXIncluder(OutputStream out) {
        try {
          this.out = new OutputStreamWriter(out, "UTF8");
          this.encoding="UTF-8";
        }
        catch (UnsupportedEncodingException e) {
          // This really shouldn't happen
        }    
    }

    public void setDocumentLocator(Locator locator) {}
    
    public void startDocument() throws SAXException {

        try {
            out.write("<?xml version='1.0' encoding='" 
              + encoding + "'?>\r\n");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }        
        
    }
    
    public void endDocument() throws SAXException {
        
        try {
            out.flush();
        }
        catch (IOException e) {
            throw new SAXException("Flush failed", e);       
        }
        
    }
    
    public void startPrefixMapping(String prefix, String uri)
      throws SAXException {
        
    }
    
    public void endPrefixMapping(String prefix)
      throws SAXException {
        
    }

    public void startElement(String namespaceURI, String localName,
      String qualifiedName, Attributes atts)
      throws SAXException {

        try {
            out.write("<" + qualifiedName);
            for (int i = 0; i < atts.getLength(); i++) {
                out.write(" ");   
                out.write(atts.getQName(i));   
                out.write("='");   
                out.write(atts.getValue(i));   
                out.write("'");
            }
            out.write(">");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }        
        
    }
      
    public void endElement(String namespaceURI, String localName,
      String qualifiedName) throws SAXException {
        
        try {
            out.write("</" + qualifiedName + ">");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
            
    }

    // need to escape text????
    public void characters(char[] ch, int start, int length) 
      throws SAXException {
        
        try {
            out.write(ch, start, length);
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
    
    }

    public void ignorableWhitespace(char[] ch, int start, int length)
      throws SAXException {
        this.characters(ch, start, length);   
    }

    // do I need to escape text in PI????
    public void processingInstruction(String target, String data)
      throws SAXException {

        try {
            out.write("<?" + target + " " + data + "?>");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
        
    }

    public void skippedEntity(String name) throws SAXException {
        
        try {
            out.write("&" + name + ";");
        }
        catch (IOException e) {
            throw new SAXException("Write failed", e);       
        }
        
    }

    // This should check the encoding and use charrefs on
    // non-encoded characters
    public static String escapeText(String s) {
     
      if (s.indexOf('&') != -1 || s.indexOf('<') != -1) {
        StringBuffer result = new StringBuffer(s.length() + 5);
        for (int i = 0; i < s.length(); i++) {
          char c = s.charAt(i);
          if (c == '&') result.append("&amp;");
          else if (c == '<') result.append("&lt;");
          else if (c == '"') result.append("&quot;");
          else if (c == '\'') result.append("&apos;");
          else result.append(c);
        }
        return result.toString();  
      }
      else {
        return s;   
      }
          
    }

    //LexicalHandler methods
    private boolean inDTD = false;
    public void startDTD(String name, String publicId, String systemId)
      throws SAXException {
        inDTD = true;
    }
    public void endDTD() throws SAXException { 
        inDTD = false;
    }
    
    public void startEntity(String name) throws SAXException {}
    public void endEntity(String name) throws SAXException {}
    public void startCDATA() throws SAXException {}
    public void endCDATA() throws SAXException {}

    // Just need this reference so we can ask if a comment is 
    // inside an include element or not
    private XIncludeFilter filter = null;

    public void setFilter(XIncludeFilter filter) {
        this.filter = filter;
    } 
    
    public void comment(char[] ch, int start, int length)
      throws SAXException {
        
        if (!inDTD && !filter.insideIncludeElement()) {
            try {
                out.write("<!--");
                out.write(ch, start, length);
                out.write("-->");
            }
            catch (IOException e) {
                throw new SAXException("Write failed", e);       
            }
        }
      
    }    
    
    /**
      * <p>
      * The driver method for the SAXXIncluder program.
      * </p>
      *
      * @param args  contains the URLs and/or filenames
      *              of the documents to be procesed.
      */
    public static void main(String[] args) {

        // make this more robust
        XMLReader parser; 
        try {
            parser = XMLReaderFactory.createXMLReader();
        } 
        catch (SAXException e) {
            try {
                parser = XMLReaderFactory.createXMLReader(
                  "org.apache.xerces.parsers.SAXParser");
            }
            catch (SAXException e2) {
                System.err.println("Could not find an XML parser");
                return;
            }
        }
        
        // Need better namespace handling
        try {
            parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
        }
        catch (SAXException e) {
            System.err.println(e);
            return;
        }   
        
        for (int i = 0; i < args.length; i++) {
            try {
                URL base;
                try {
                    base = new URL(args[i]);
                }
                catch (MalformedURLException e) {
                    File f = new File(args[i]);
                    base = f.toURL();
                }
                XIncludeFilter includer = new XIncludeFilter(base); 
                includer.setParent(parser);
                SAXXIncluder s = new SAXXIncluder(System.out);
                includer.setContentHandler(s);
                try {
                    includer.setProperty(
                      "http://xml.org/sax/properties/lexical-handler",
                       s);
                    s.setFilter(includer);
                }
                catch (SAXException e) {
                    // Will not support comments
                } 
                includer.parse(args[i]);
            }
            catch (Exception e) { // be specific about exceptions????
                System.err.println(e);
                e.printStackTrace();
            }
        }

    }

}

Previous | Next | Top | Cafe con Leche

Copyright 2000, 2001 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified August 21, 2001