DOM Example
import org.apache.xerces.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.io.IOException;
import java.util.StringTokenizer;
public class DOMWordCount {
public static void main(String[] args) {
DOMParser parser = new DOMParser();
DOMWordCount counter = new DOMWordCount();
for (int i = 0; i < args.length; i++) {
try {
// Read the entire document into memory
parser.parse(args[i]);
Document d = parser.getDocument();
int numWords = countWordsInNode(d);
System.out.println(numWords + " words");
}
catch (SAXException e) {
System.err.println(e);
}
catch (IOException e) {
System.err.println(e);
}
}
} // end main
// note use of recursion
public static int countWordsInNode(Node node) {
int numWords = 0;
if (node.hasChildNodes()) {
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
numWords += countWordsInNode(children.item(i));
}
}
int type = node.getNodeType();
if (type == Node.TEXT_NODE) {
String s = node.getNodeValue();
numWords += countWordsInString(s);
}
return numWords;
}
private static int countWordsInString(String s) {
if (s == null) return 0;
s = s.trim();
if (s.length() == 0) return 0;
StringTokenizer st = new StringTokenizer(s);
return st.countTokens();
}
}
% java DOMWordCount hotcop.xml
16 words