Example: RDDLStripper

import org.xmlpull.v1.*;
import java.net.*;
import java.io.*;

 
public class RDDLStripper {
    
  public final static String RDDL_NS = "http://www.rddl.org/";

  public static void main(String[] args) {
        
    if (args.length == 0) {
      System.err.println("Usage: java RDDLStripper url" );
      return;    
    }
        
    try {
      XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
      factory.setNamespaceAware(true);
      XmlPullParser parser = factory.newPullParser();
      XmlSerializer serializer = factory.newSerializer();
      serializer.setOutput(System.out, "ISO-8859-1");
      
      InputStream in;
      try {
        URL u = new URL(args[0]);
        in = u.openStream();
      }
      catch (MalformedURLException ex) {
          // Maybe it's a file name
          in = new FileInputStream(args[0]);
      }
      parser.setInput(in, null);
        
      while (true) {
         int event = parser.nextToken();
         if (event == XmlPullParser.START_TAG) {
             String namespaceURI = parser.getNamespace();
             if (!namespaceURI.equals(RDDL_NS)) {
                 String prefix = parser.getPrefix();
                 if (prefix == null) prefix = "";
                 if (namespaceURI != null) {
                     serializer.setPrefix(prefix, namespaceURI);
                 }
                 serializer.startTag(namespaceURI, parser.getName());
                 // add attributes
                 for (int i = 0; i < parser.getAttributeCount(); i++) {
                     serializer.attribute(
                       parser.getAttributeNamespace(i),
                       parser.getAttributeName(i),
                       parser.getAttributeValue(i)
                     );
                     // How to define attribute prefixes????
                 }
             }
         }
         else if (event == XmlPullParser.END_TAG) {
             String namespaceURI = parser.getNamespace();
             if (!namespaceURI.equals(RDDL_NS)) {
                 serializer.endTag(namespaceURI, parser.getName());
             }
         }
         else if (event == XmlPullParser.TEXT) {
             serializer.text(parser.getText());
         }
         else if (event == XmlPullParser.CDSECT) {
             serializer.cdsect(parser.getText());
         }
         else if (event == XmlPullParser.COMMENT) {
             serializer.comment(parser.getText());
         }
         else if (event == XmlPullParser.DOCDECL) {
             serializer.docdecl(parser.getText());
         }
         else if (event == XmlPullParser.ENTITY_REF) {
             serializer.entityRef(parser.getName());
        }
         else if (event == XmlPullParser.IGNORABLE_WHITESPACE) {
             serializer.ignorableWhitespace(parser.getText());
         }
         else if (event == XmlPullParser.PROCESSING_INSTRUCTION) {
             serializer.processingInstruction(parser.getText());
         }
         else if (event == XmlPullParser.TEXT) {
             serializer.text(parser.getText());
         }
         else if (event == XmlPullParser.END_DOCUMENT) {
            serializer.flush();
            break;
         }
      }           
    }
    catch (XmlPullParserException ex) {
       System.out.println(ex);  
    }
    catch (IOException e) {
      System.out.println("IOException while parsing " + args[0]);   
    }
        
  }

}

Previous | Next | Top | Cafe con Leche

Copyright 2000-2003 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified November 6, 2002