SAX Example
import org.apache.xerces.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.io.IOException;
import java.util.StringTokenizer;
public class SAXWordCount implements ContentHandler {
private int numWords;
public void startDocument() throws SAXException {
this.numWords = 0;
}
public void endDocument() throws SAXException {
System.out.println(numWords + " words");
System.out.flush();
}
private StringBuffer sb = new StringBuffer();
public void characters(char[] text, int start, int length)
throws SAXException {
sb.append(text, start, length);
}
private void flush() {
numWords += countWords(sb.toString());
sb = new StringBuffer();
}
// methods that signify a word break
public void startElement(String namespaceURI, String localName,
String rawName, Attributes atts) throws SAXException {
this.flush();
}
public void endElement(String namespaceURI, String localName,
String rawName) throws SAXException {
this.flush();
}
public void processingInstruction(String target, String data)
throws SAXException {
this.flush();
}
// methods that aren't necessary in this example
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
// ignore;
}
public void ignorableWhitespace(char[] text, int start, int length)
throws SAXException {
// ignore;
}
public void endPrefixMapping(String prefix) throws SAXException {
// ignore;
}
public void skippedEntity(String name) throws SAXException {
// ignore;
}
public void setDocumentLocator(Locator locator) {}
private static int countWords(String s) {
if (s == null) return 0;
s = s.trim();
if (s.length() == 0) return 0;
StringTokenizer st = new StringTokenizer(s);
return st.countTokens();
}
public static void main(String[] args) {
SAXParser parser = new SAXParser();
SAXWordCount counter = new SAXWordCount();
parser.setContentHandler(counter);
for (int i = 0; i < args.length; i++) {
try {
parser.parse(args[i]);
}
catch (SAXException e) {
System.err.println(e);
}
catch (IOException e) {
System.err.println(e);
}
}
} // end main
}
% java SAXWordCount hotcop.xml
16 words