An XLink Spider Utility
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;
import java.util.*;
public class XLinkSpider extends HandlerBase {
public static Enumeration listURIs(String systemId)
throws SAXException, IOException {
Parser parser;
try {
parser = ParserFactory.makeParser();
}
catch (Exception e) {
// fall back on Xerces parser by name
try {
parser = ParserFactory.makeParser(
"org.apache.xerces.parsers.SAXParser");
}
catch (Exception ee) {
throw new SAXException(ee);
}
}
// Install the Document Handler
XLinkSpider spider = new XLinkSpider();
parser.setDocumentHandler(spider);
parser.parse(systemId);
return spider.uris.elements();
}
private Vector uris = new Vector();
public void startElement(String name, AttributeList attributes)
throws SAXException {
// This isn't really compliant since it doesn't pay
// attention to namespaces
String uri = attributes.getValue("xlink:href");
if (uri != null) uris.addElement(uri);
}
public static void main(String[] args) {
if (args.length == 0) {
System.out.println("Usage: java XLinkSpider URL1 URL2...");
}
// start parsing...
for (int i = 0; i < args.length; i++) {
try {
Enumeration uris = listURIs(args[i]);
while (uris.hasMoreElements()) {
String s = (String) uris.nextElement();
System.out.println(s);
}
}
catch (Exception e) {
System.err.println(e);
e.printStackTrace();
}
} // end for
} // end main
} // end XLinkSpider