TextEntityReplacer
import org.xml.sax.*;
import java.util.*;
import java.net.*;
import java.io.*;
public class TextEntityReplacer implements DTDHandler {
/* This class stores the notation and entity declarations
for a single document. It is not designed to be reused
for multiple parses, though that would be straightforward
extension. The public and system IDs of the document
being parsed are set in the constructor.
*/
private URL systemID;
private String publicID;
public TextEntityReplacer(String publicID, String systemID)
throws MalformedURLException {
System.err.println("created");
this.publicID = publicID;
this.systemID = new URL(systemID);
}
// store all notations in a hashtable. We'll need them later
private Hashtable notations = new Hashtable();
// for the DTDHandler interface
public void notationDecl(String name, String publicID,
String systemID)
throws SAXException {
Notation n = new Notation(name, publicID, systemID);
notations.put(name, n);
}
private class Notation {
String name;
String publicID;
String systemID;
Notation(String name, String publicID, String systemID) {
this.name = name;
this.publicID = publicID;
this.systemID = systemID;
}
}
// store all unparsed entities in a hashtable. We'll need them later
private Hashtable unparsedEntities = new Hashtable();
// for the DTDHandler interface
public void unparsedEntityDecl(String name, String publicID,
String systemID, String notationName) throws SAXException {
UnparsedEntity e = new UnparsedEntity(name, publicID,
systemID, notationName);
unparsedEntities.put(name, e);
}
private class UnparsedEntity {
String name;
String publicID;
String systemID;
String notationName;
UnparsedEntity(String name, String publicID,
String systemID, String notationName) {
this.name = name;
this.notationName = notationName;
this.publicID = publicID;
this.systemID = systemID;
}
}
public boolean isText(String notationName) {
Object o = notations.get(notationName);
if (o == null) return false;
Notation n = (Notation) o;
if (n.systemID.startsWith("text/")) return true;
return false;
}
public String getText(String entityName) throws IOException {
Object o = unparsedEntities.get(entityName);
if (o == null) return "";
UnparsedEntity entity = (UnparsedEntity) o;
if (!isText(entity.notationName)) {
return " binary data "; // could throw an exception instead
}
URL source;
try {
source = new URL(systemID, entity.systemID);
}
catch (Exception e) {
return " unresolvable entity "; // could throw an exception instead
}
// I'm not really handling characetr encodings here.
// A more detailed look at the MIME media type would allow that.
Reader in = new BufferedReader(
new InputStreamReader(source.openStream())
);
StringBuffer result = new StringBuffer();
int c;
while ((c = in.read()) != -1) {
// Is this necessaary or will parser escape string automatically????
/* switch (c) {
case '<':
result.append("<");
break;
case '>':
result.append(">");
break;
case '"':
result.append(""");
break;
case '\'':
result.append("'");
break;
case '&':
result.append("&");
break;
default:
result.append((char) c);
}*/
result.append((char) c);
}
return result.toString();
}
}