JDOM based TagStripper
import org.jdom.*;
import org.jdom.output.XMLOutputter;
import org.jdom.input.SAXBuilder;
import java.io.*;
import java.util.*;
public class JDOMTagStripper extends XMLOutputter {
public JDOMTagStripper() {
super();
}
// Things we won't print at all
protected void printDeclaration(Document doc, Writer out, String encoding) {}
protected void printComment(Comment comment, Writer out, int indentLevel) {}
protected void printDocType(DocType docType, Writer out) {}
protected void printProcessingInstruction(ProcessingInstruction pi,
Writer out) {}
protected void printNamespace(Namespace ns, Writer out) {}
protected void printAttributes(List attributes, Writer out) {}
protected void printElement(Element element, Writer out,
int indentLevel, NamespaceStack namespaces) throws IOException {
List content = element.getContent();
Iterator iterator = content.iterator();
while (iterator.hasNext()) {
Object o = iterator.next();
if (o instanceof Text) {
Text t = (Text) o;
out.write(t.getText());
}
else if (o instanceof CDATA) {
CDATA t = (CDATA) o;
out.write(t.getText());
}
else if (o instanceof Element) {
printElement((Element) o, out, indentLevel, namespaces);
}
}
}
// Could easily have put main() method in a separate class
public static void main(String[] args) {
if (args.length == 0) {
System.out.println(
"Usage: java TagStripper URL1 URL2...");
}
JDOMTagStripper stripper = new JDOMTagStripper();
SAXBuilder builder = new SAXBuilder();
// start parsing...
for (int i = 0; i < args.length; i++) {
// command line should offer URIs or file names
try {
Document doc = builder.build(args[i]);
stripper.output(doc, System.out);
}
catch (JDOMException e) { // a well-formedness error
System.out.println(args[i] + " is not well formed.");
System.out.println(e.getMessage());
}
catch (IOException e) { // a well-formedness error
System.out.println(e.getMessage());
}
}
}
}