DOM Example

import org.apache.xerces.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.util.StringTokenizer;

public class DOMWordCount {

  public static void main(String[] args) {
    DOMParser parser = new DOMParser();
    DOMWordCount counter = new DOMWordCount();
    for (int i = 0; i < args.length; i++) {
      try {
        // Read the entire document into memory
        Document d = parser.getDocument();
        int numWords = countWordsInNode(d);
        System.out.println(numWords + " words");

      catch (SAXException e) {
      catch (IOException e) {
  } // end main

  // note use of recursion
  public static int countWordsInNode(Node node) {
    int numWords = 0;
    if (node.hasChildNodes()) {
      NodeList children = node.getChildNodes();
      for (int i = 0; i < children.getLength(); i++) {
        numWords += countWordsInNode(children.item(i));

    int type = node.getNodeType();
    if (type == Node.TEXT_NODE) {
      String s = node.getNodeValue();
      numWords += countWordsInString(s);
    return numWords;  
  private static int countWordsInString(String s) {
    if (s == null) return 0;
    s = s.trim();
    if (s.length() == 0) return 0;
    StringTokenizer st = new StringTokenizer(s);
    return st.countTokens();

% java DOMWordCount hotcop.xml
16 words

Previous | Next | Top | Cafe con Leche

Copyright 2000-2002 Elliotte Rusty Harold
Last Modified July 12, 2000