DOM based TagStripper
import org.apache.xerces.parsers.*;
import org.apache.xerces.dom.*;
import org.w3c.dom.*;
import org.w3c.dom.traversal.*;
import org.xml.sax.SAXException;
import java.io.IOException;
public class DOMTagStripper {
public static void main(String[] args) {
DOMParser parser = new DOMParser();
for (int i = 0; i < args.length; i++) {
try {
// Read the entire document into memory
parser.parse(args[i]);
Document doc = parser.getDocument();
DocumentImpl impl = (DocumentImpl) doc;
NodeIterator iterator = impl.createNodeIterator(
doc.getDocumentElement(), NodeFilter.SHOW_TEXT, null, true
);
Node node;
while ((node = iterator.nextNode()) != null) {
System.out.print(node.getNodeValue());
}
}
catch (SAXException e) {
System.err.println(e);
}
catch (IOException e) {
System.err.println(e);
}
}
} // end main
}