Cutting Edge XML Programming


Cutting Edge XML Programming

Elliotte Rusty Harold

XMLOne 2001 Austin

Thursday, March 8, 2000

elharo@metalab.unc.edu

http://www.ibiblio.org/xml/


Outline


Part I: XML Infoset

The Infoset is the unfortunate standard to which those in retreat from the radical and most useful implications of well-formedness have rallied. At its core the Infoset insists that there is 'more' to XML than the straightforward syntax of well-formedness. By imposing its canonical semantics the Infoset obviates the infinite other semantic outcomes which might be elaborated in particular unique circumstances from an instance of well-formed XML 1.0 syntax. The question we should be asking is not whether the Infoset has chosen the correct canonical semantics, but whether the syntactic possibilities of XML 1.0 should be curtailed in this way at all.
--Walter Perry on the xml-dev mailing list


A normal XML document

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/css" href="song.css"?>
<!DOCTYPE SONG SYSTEM "song.dtd">
<SONG xmlns="http://www.ibiblio.org/xml/namespace/song"
      xmlns:xlink="http://www.w3.org/1999/xlink">
  <TITLE>Hot Cop</TITLE>
  <PHOTO 
    xlink:type="simple" xlink:show="onLoad" xlink:href="hotcop.jpg"
    ALT="Victor Willis in Cop Outfit" WIDTH="100" HEIGHT="200"/>
  <COMPOSER>Jacques Morali</COMPOSER>
  <COMPOSER>Henri Belolo</COMPOSER>
  <COMPOSER>Victor Willis</COMPOSER>
  <PRODUCER>Jacques Morali</PRODUCER>
  <!-- The publisher is actually Polygram but I needed 
       an example of a general entity reference. -->
  <PUBLISHER xlink:type="simple" xlink:href="http://www.amrecords.com/">
    A &amp; M Records
  </PUBLISHER>
  <LENGTH>6:20</LENGTH>
  <YEAR>1978</YEAR>
  <ARTIST>Village People</ARTIST>
</SONG>
<!-- You can tell what album I was 
     listening to when I wrote this example -->

A canonical XML document

<?xml-stylesheet type="text/css" href="song.css"?>
<SONG xmlns="http://www.ibiblio.org/xml/namespace/song" xmlns:xlink="http://www.w3.org/1999/xlink">
  <TITLE>Hot Cop</TITLE>
  <PHOTO ALT="Victor Willis in Cop Outfit" HEIGHT="200" WIDTH="100" xlink:href="hotcop.jpg" xlink:show="onLoad" xlink:type="simple"></PHOTO>
  <COMPOSER>Jacques Morali</COMPOSER>
  <COMPOSER>Henri Belolo</COMPOSER>
  <COMPOSER>Victor Willis</COMPOSER>
  <PRODUCER>Jacques Morali</PRODUCER>
  
  <PUBLISHER xlink:href="http://www.amrecords.com/" xlink:type="simple">
    A &amp; M Records
  </PUBLISHER>
  <LENGTH>6:20</LENGTH>
  <YEAR>1978</YEAR>
  <ARTIST>Village People</ARTIST>
</SONG>

An org.w3c.dom.Document object formed by reading hotcop.xml

import org.apache.xerces.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.io.*;

public class DOMHotCop {

  public static void main(String[] args) {

    DOMParser parser = new DOMParser();

    try {
      parser.parse("http://www.ibiblio.org/xml/examples/hot_cop.xml");
      Document d = parser.getDocument();
    }
    catch (SAXException e) {
      System.err.println(e);
    }
    catch (IOException e) {
      System.err.println(e);
    }

  }

}

Are these three the same thing or not?


What is the XML InfoSet?


The InfoSet defines 17 kinds of Information Items


The Document Information Item


Elements

<PHOTO 
  xlink:type="simple" xlink:show="onLoad" xlink:href="hotcop.jpg"
  ALT="Victor Willis in Cop Outfit" WIDTH="100" HEIGHT="200"/>
  
<COMPOSER>Jacques Morali</COMPOSER>

<COMPOSER>
  <PERSON>
    <NAME>
      <FIRST>Henri</FIRST>
      <LAST>Belolo</LAST>
    </NAME>
  </PERSON>
</COMPOSER>

<rdf:RDF xmlns:rdf="http://www.w3.org/TR/REC-rdf-syntax#">
  <rdf:Description xmlns:dc="http://purl.org/dc/"
     about="http://www.ibiblio.org/examples/impressionists.xml">
    <dc:title> Impressionist Paintings </dc:title>
    <dc:creator> Elliotte Rusty Harold </dc:creator>
    <dc:description> 
      A list of famous impressionist paintings organized 
      by painter and date 
    </dc:description>
    <dc:date>2000-08-22</dc:date>
  </rdf:Description>
</rdf:RDF>

Element Information Items

An Element Information Item Includes:


Attributes

xlink:type="simple"
xlink:href="http://www.amrecords.com/"
xlink:type =  "simple"
xlink:show = "onLoad"
xlink:href="hotcop.jpg"
ALT="Victor Willis in Cop Outfit"
WIDTH=" 100 "
HEIGHT=' 200 '

An Attribute Information Item Includes:


Comments

  <!-- The publisher is actually Polygram but I needed 
       an example of a general entity reference. -->
<!--  <PUBLISHER xlink:type="simple" xlink:href="http://www.amrecords.com/">
    A &amp; M Records
  </PUBLISHER>
  <LENGTH>6:20</LENGTH>
  <YEAR>1978</YEAR>
  <ARTIST>Village People</ARTIST>
</SONG> -->
<!-- You can tell what album I was 
     listening to when I wrote this example -->

A comment Information Item includes:


A Processing Instruction Information Item Includes:

<?robots index="yes" follow="no"?>
<?php 
  mysql_connect("database.unc.edu", "clerk", "password"); 
  $result = mysql("CYNW", "SELECT LastName, FirstName FROM Employees 
    ORDER BY LastName, FirstName"); 
  $i = 0;
  while ($i < mysql_numrows ($result)) {
     $fields = mysql_fetch_row($result);
     echo "<person>$fields[1] $fields[0] </person>\r\n";
     $i++;
  }
  mysql_close();
?>

Characters


Namespaces

xmlns:rdf="http://www.w3.org/TR/REC-rdf-syntax#"
xmlns:dc="http://purl.org/dc/"
xmlns="http://www.w3.org/Graphics/SVG/SVG-19991203.dtd"

Document Type Declaration

<!DOCTYPE SONG SYSTEM "song.dtd">
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
                       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

A Document Type Declaration Information Item includes:


Document Type Definition

<!ELEMENT SONG (TITLE, PHOTO?, COMPOSER+, PRODUCER*, 
 PUBLISHER*, LENGTH?, YEAR?, ARTIST+)>
<!ATTLIST SONG xmlns       CDATA #REQUIRED
               xmlns:xlink CDATA #REQUIRED>
<!ELEMENT TITLE (#PCDATA)>

<!ELEMENT PHOTO EMPTY>
<!ATTLIST PHOTO xlink:type CDATA #FIXED "simple"
                xlink:href CDATA #REQUIRED
                xlink:show CDATA #IMPLIED
                ALT        CDATA #REQUIRED
                WIDTH      CDATA #REQUIRED
                HEIGHT     CDATA #REQUIRED
>

<!ELEMENT COMPOSER (#PCDATA)>
<!ELEMENT PRODUCER (#PCDATA)>
<!ELEMENT PUBLISHER (#PCDATA)>
<!ATTLIST PUBLISHER xlink:type CDATA #IMPLIED
                    xlink:href CDATA #IMPLIED
>

<!ELEMENT LENGTH (#PCDATA)>
<!-- This should be a four digit year like "1999",
     not a two-digit year like "99" -->
<!ELEMENT YEAR (#PCDATA)>

<!ELEMENT ARTIST (#PCDATA)>

Entities


Entity Marker Information Items


Entity Information Items


Internal Entity Information Items


External Entity Information Items


Unparsed Entity Information Items


Unexpanded Entity Information Items


The InfoSet Omits:


Canonical XML


How are documents canonicalized?

  1. The document is encoded in UTF-8

  2. Line breaks are normalized to a linefeed (ASCII , \n)

  3. Attribute values are normalized, as if by a validating processor

  4. Character and parsed entity references are replaced

  5. CDATA sections are replaced with their character content

  6. The XML and document type declarations are removed

  7. Empty elements are converted to start tag-end tag pairs

  8. White space outside of the document element and within start and end tags is normalized

  9. All white space in character content is retained (except for characters removed during linefeed normalization)

  10. Attribute value delimiters are set to double quotes

  11. Special characters in attribute values and character content are replaced by character references

  12. Superfluous namespace declarations are removed from each element

  13. Default attributes are added to each element

  14. Lexicographic order is imposed on the namespace declarations and attributes of each element


Canonicalization software


Digital Signatures


Not Just for Signing XML


Signature Process

  1. The signature processor digests a data object.

  2. The processor places the digest value in a Signature element.

  3. The processor digests the Signature element.

  4. The processor cryptographically signs the Signature element.


XML Digital Signature software


A Detached Signature for hotcop.xml

<?xml version='1.0' encoding='UTF-8'?>
<Signature xmlns="http://www.w3.org/2000/09/xmldsig#">
  <SignedInfo>
    <CanonicalizationMethod Algorithm="http://www.w3.org/TR/2000/WD-xml-c14n-20000119"/>
    <SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#dsa-sha1"/>
    <Reference URI="http://www.ibiblio.org/xml/slides/hoffman/fundamentals/examples/hotcop.xml">
      <DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1"/>
      <DigestValue>nvfYilfgN/rICyzhGmjidKCFoC8=</DigestValue>
    </Reference>
  </SignedInfo>
  <SignatureValue>
    hfowa4qdbuMkoZfX1/VXd4UBpIpZMM5+6CElmY7jOIKFqvXq5A5VKw==
  </SignatureValue>
  <KeyInfo>
    <KeyValue>
      <DSAKeyValue>
        <P>
          /X9TgR11EilS30qcLuzk5/YRt1I870QAwx4/gLZRJmlFXUAiUftZPY1Y+r/F9bow9s
          ubVWzXgTuAHTRv8mZgt2uZUKWkn5/oBHsQIsJPu6nX/rfGG/g7V+fGqKYVDwT7g/bT
          xR7DAjVUE1oWkTL2dfOuK2HXKu/yIgMZndFIAcc=
        </P>
        <Q>l2BQjxUjC8yykrmCouuEC/BYHPU=</Q>
        <G>
          9+GghdabPd7LvKtcNrhXuXmUr7v6OuqC+VdMCz0HgmdRWVeOutRZT+ZxBxCBgLRJFn
          Ej6EwoFhO3zwkyjMim4TwWeotUfI0o4KOuHiuzpnWRbqN/C/ohNWLx+2J6ASQ7zKTx
          vqhRkImog9/hWuWfBpKLZl6Ae1UlZAFMO/7PSSo=
        </G>
        <Y>
          6jKpNnmkkWeArsn5Oeeg2njcz+nXdk0f9kZI892ddlR8Lg1aMhPeFTYuoq3I6neFlb
          BjWzuktNZKiXYBfKsSTB8U09dTiJo2ir3HJuY7eW/p89osKMfixPQsp9vQMgzph6Qa
          lY7j4MB7y5ROJYsTr1/fFwmj/yhkHwpbpzed1LE=
        </Y>
      </DSAKeyValue>
    </KeyValue>
    <X509Data>
      <X509IssuerSerial>
        <X509IssuerName>CN=Elliotte Rusty Harold, OU=Metrotech, O=Polytechnic, L=Brooklyn, ST=New York, C=US</X509IssuerName>
        <X509SerialNumber>983556890</X509SerialNumber></X509IssuerSerial>
      <X509SubjectName>CN=Elliotte Rusty Harold, OU=Metrotech, O=Polytechnic, L=Brooklyn, ST=New York, C=US</X509SubjectName>
      <X509Certificate>
MIIDLzCCAu0CBDqf4xowCwYHKoZIzjgEAwUAMH0xCzAJBgNVBAYTAlVTMREwDwYDVQQIEwhOZXcg
WW9yazERMA8GA1UEBxMIQnJvb2tseW4xFDASBgNVBAoTC1BvbHl0ZWNobmljMRIwEAYDVQQLEwlN
ZXRyb3RlY2gxHjAcBgNVBAMTFUVsbGlvdHRlIFJ1c3R5IEhhcm9sZDAeFw0wMTAzMDIxODE0NTBa
Fw0wMTA1MzExODE0NTBaMH0xCzAJBgNVBAYTAlVTMREwDwYDVQQIEwhOZXcgWW9yazERMA8GA1UE
BxMIQnJvb2tseW4xFDASBgNVBAoTC1BvbHl0ZWNobmljMRIwEAYDVQQLEwlNZXRyb3RlY2gxHjAc
BgNVBAMTFUVsbGlvdHRlIFJ1c3R5IEhhcm9sZDCCAbgwggEsBgcqhkjOOAQBMIIBHwKBgQD9f1OB
HXUSKVLfSpwu7OTn9hG3UjzvRADDHj+AtlEmaUVdQCJR+1k9jVj6v8X1ujD2y5tVbNeBO4AdNG/y
ZmC3a5lQpaSfn+gEexAiwk+7qdf+t8Yb+DtX58aophUPBPuD9tPFHsMCNVQTWhaRMvZ1864rYdcq
7/IiAxmd0UgBxwIVAJdgUI8VIwvMspK5gqLrhAvwWBz1AoGBAPfhoIXWmz3ey7yrXDa4V7l5lK+7
+jrqgvlXTAs9B4JnUVlXjrrUWU/mcQcQgYC0SRZxI+hMKBYTt88JMozIpuE8FnqLVHyNKOCjrh4r
s6Z1kW6jfwv6ITVi8ftiegEkO8yk8b6oUZCJqIPf4VrlnwaSi2ZegHtVJWQBTDv+z0kqA4GFAAKB
gQDqMqk2eaSRZ4Cuyfk556DaeNzP6dd2TR/2Rkjz3Z12VHwuDVoyE94VNi6ircjqd4WVsGNbO6S0
1kqJdgF8qxJMHxTT11OImjaKvccm5jt5b+nz2iwox+LE9Cyn29AyDOmHpBqVjuPgwHvLlE4lixOv
X98XCaP/KGQfClunN53UsTALBgcqhkjOOAQDBQADLwAwLAIUODqxsFzS96BjrVA4LVo5FzuWBRMC
FC0xfXxbaJaCJuVqtcBv4bqwV0EX
      </X509Certificate>
    </X509Data>
  </KeyInfo>
</Signature>


To Learn More


Part II: DOM Level 3


Trees


Document Object Model


DOM Evolution


DOM Parsers for Java


Eight Modules:


DOM Trees


The Core: org.w3c.dom


The Node Interface

package org.w3c.dom;

public interface Node {

  // NodeType
  public static final short ELEMENT_NODE                = 1;
  public static final short ATTRIBUTE_NODE              = 2;
  public static final short TEXT_NODE                   = 3;
  public static final short CDATA_SECTION_NODE          = 4;
  public static final short ENTITY_REFERENCE_NODE       = 5;
  public static final short ENTITY_NODE                 = 6;
  public static final short PROCESSING_INSTRUCTION_NODE = 7;
  public static final short COMMENT_NODE                = 8;
  public static final short DOCUMENT_NODE               = 9;
  public static final short DOCUMENT_TYPE_NODE          = 10;
  public static final short DOCUMENT_FRAGMENT_NODE      = 11;
  public static final short NOTATION_NODE               = 12;

  public String       getNodeName();
  public String       getNodeValue() throws DOMException;
  public void         setNodeValue(String nodeValue) throws DOMException;
  public short        getNodeType();
  public Node         getParentNode();
  public NodeList     getChildNodes();
  public Node         getFirstChild();
  public Node         getLastChild();
  public Node         getPreviousSibling();
  public Node         getNextSibling();
  public NamedNodeMap getAttributes();
  public Document     getOwnerDocument();
  public Node         insertBefore(Node newChild, Node refChild) throws DOMException;
  public Node         replaceChild(Node newChild, Node oldChild) throws DOMException;
  public Node         removeChild(Node oldChild) throws DOMException;
  public Node         appendChild(Node newChild) throws DOMException;
  public boolean      hasChildNodes();
  public Node         cloneNode(boolean deep);
  public void         normalize();
  public boolean      supports(String feature, String version);
  public String       getNamespaceURI();
  public String       getPrefix();
  public void         setPrefix(String prefix) throws DOMException;
  public String       getLocalName();
}

The NodeList Interface

package org.w3c.dom;

public interface NodeList {
  public Node item(int index);
  public int  getLength();
}

Now we're really ready to read a document


Node Reporter

import org.apache.xerces.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.io.*;


public class NodeReporter {

  public static void main(String[] args) {
     
    DOMParser parser = new DOMParser();
    NodeReporter iterator = new NodeReporter();
    
    for (int i = 0; i < args.length; i++) {
      try {
        // Read the entire document into memory
        parser.parse(args[i]); 
       
        Document doc = parser.getDocument();
        iterator.followNode(doc);
      }
      catch (SAXException e) {
        System.err.println(e); 
      }
      catch (IOException e) {
        System.err.println(e); 
      }
      
    }
  
  } // end main

  // note use of recursion
  public void followNode(Node node) {
    
    processNode(node);
    if (node.hasChildNodes()) {
      NodeList children = node.getChildNodes();
      for (int i = 0; i < children.getLength(); i++) {
        followNode(children.item(i));
      } 
    }
    
  }

  public void processNode(Node node) {
    
    String name = node.getNodeName();
    String type = getTypeName(node.getNodeType());
    System.out.println("Type " + type + ": " + name);
    
  }
  
  public static String getTypeName(int type) {
    
    switch (type) {
      case Node.ELEMENT_NODE: return "Element";
      case Node.ATTRIBUTE_NODE: return "Attribute";
      case Node.TEXT_NODE: return "Text";
      case Node.CDATA_SECTION_NODE: return "CDATA Section";
      case Node.ENTITY_REFERENCE_NODE: return "Entity Reference";
      case Node.ENTITY_NODE: return "Entity";
      case Node.PROCESSING_INSTRUCTION_NODE: return "Processing Instruction";
      case Node.COMMENT_NODE : return "Comment";
      case Node.DOCUMENT_NODE: return "Document";
      case Node.DOCUMENT_TYPE_NODE: return "Document Type Declaration";
      case Node.DOCUMENT_FRAGMENT_NODE: return "Document Fragment";
      case Node.NOTATION_NODE: return "Notation";
      default: return "Unknown Type"; 
    }
    
  }

}

Node Reporter Output

% java NodeReporter hotcop.xml
Type Document: #document
Type Processing Instruction: xml-stylesheet
Type Document Type Declaration: SONG
Type Element: SONG
Type Text: #text
Type Element: TITLE
Type Text: #text
Type Text: #text
Type Element: PHOTO
Type Text: #text
Type Element: COMPOSER
Type Text: #text
Type Text: #text
Type Element: COMPOSER
Type Text: #text
Type Text: #text
Type Element: COMPOSER
Type Text: #text
Type Text: #text
Type Element: PRODUCER
Type Text: #text
Type Text: #text
Type Comment: #comment
Type Text: #text
Type Element: PUBLISHER
Type Text: #text
Type Text: #text
Type Element: LENGTH
Type Text: #text
Type Text: #text
Type Element: YEAR
Type Text: #text
Type Text: #text
Type Element: ARTIST
Type Text: #text
Type Text: #text
Type Comment: #comment

Attributes are missing from this output. They are not nodes. They are properties of nodes.


Node Values as returned by getNodeValue()

Node TypeNode Value
element nodenull
attribute nodeattribute value
text nodetext of the node
CDATA section nodetext of the section
entity reference nodenull
entity nodenull
processing instruction nodecontent of the processing instruction, not including the target
comment nodetext of the comment
document nodenull
document type declaration nodenull
document fragment nodenull
notation nodenull

New Features in DOM Level 3


DOM Level 3 Core Additions


DOMKey


Node3


Entity3


Document3


Text3


Bootstrapping


DOM3 Bootstrapping


Load and Save


The DOM Process

  1. Library specific code creates a parser

  2. The parser parses the document and returns a DOM org.w3c.dom.Document object.

  3. The entire document is stored in memory.

  4. DOM methods and interfaces are used to extract data from this object


Parsing documents with DOM2

This program parses with Xerces. Other parsers are different.

import org.apache.xerces.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.io.*;

public class DOMParserMaker {

  public static void main(String[] args) {
     
    DOMParser parser = new DOMParser();
    
    for (int i = 0; i < args.length; i++) {
      try {
        parser.parse(args[i]); 
       
        Document d = parser.getDocument();
      }
      catch (SAXException e) {
        System.err.println(e); 
      }
      catch (IOException e) {
        System.err.println(e); 
      }
      
    }
   
  }

}

Parsing documents with DOM3

import org.w3c.dom.*;

public class DOM3ParserMaker {

  public static void main(String[] args) {

    DOMImplementationFactoryLS impl =
      (DOMImplementationLS) DOMImplementationFactory.getDOMImplementation();
    DOMBuilder parser = impl.getDOMBuilder();

    for (int i = 0; i < args.length; i++) {
      try {
        Document d = parser.parseURI(args[i]);
      }
      catch (DOMSystemException e) {
        System.err.println(e);
      }
      catch (DOMException e) {
        System.err.println(e);
      }

    }

  }

}

This code will not actually compile or run until some parser supports DOM3 Load and Save.


Load and Save

DOMImplementationLS
A new DOMImplementation interface that provides the factory methods for creating the objects required for loading and saving.
DOMBuilder
A parser interface
DOMInputSource
Encapsulate information about the source of the XML to be loaded, like SAX's InputSource
DOMEntityResolver
During loading, provides a way for applications to redirect references to external entities.
DOMBuilderFilter
Provide the ability to examine and optionally remove Element nodes as they are being processed during the parsing of a document. like SAX filters.
DOMWriter
An interface for serializing DOM documents onto a stream.

DOMImplementationLS


DOMBuilder


DOMInputSource


DOMEntityResolver


DOMWriter


DOMBuilderFilter


Grammar Access/Content Models


Content Model Interfaces


Content Model and CM-Editing Interfaces


The CMModel Interface


The CMExternalModel Interface


The CMNode Interface


The CMNodeList Interface


The CMNamedNodeMap Interface


The CMDataType Interface


The ElementDeclaration Interface


The CMChildren Interface


The AttributeDeclaration Interface


The EntityDeclaration Interface


The CMNotationDeclaration Interface


Validation and Other Interfaces:


The Document Interface


The DocumentCM Interface


The DOMImplementationCM Interface


Document-Editing Interfaces:


The NodeCM Interface


The ElementCM Interface


The CharacterDataCM Interface


The DocumentTypeCM Interface


The AttributeCM Interface


DOM Error Handler Interfaces


The DOMErrorHandler Interface


The DOMLocator Interface


To Learn More


Part III: XSLT 1.1 and Beyond

In SQL, the query language is not expressed in tables and rows. In XQuery, the query language is not expressed in XML. Why is this a problem?
--Jonathan Robie on the xml-dev mailing list


XSLT 1.1


Identifying 1.1 compliant stylesheets

<xsl:stylesheet version="1.1"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

  <!-- Top level elements -->

</xsl:stylesheet>

No result tree fragments


Multiple Output Documents


xsl:document Example

     <xsl:document method="html" encoding="ISO-8859-1" href="index.html">
       <html>
         <head>
           <title><xsl:value-of select="title"/></title>         
         </head>
         <body> 
           <h1 align="center"><xsl:value-of select="title"/></h1> 
           <ul>
             <xsl:for-each select="slide">
               <li><a href="{format-number(position(),'00')}.html"><xsl:value-of select="title"/></a></li>
             </xsl:for-each>    
           </ul>           
           
           <p><a href="{translate(title,' ', '_')}.html">Entire Presentation as Single File</a></p>
              
           <hr/>
           <div align="center">
             <A HREF="01.html">Start</A> | <A HREF="/xml/">Cafe con Leche</A>
           </div>
           <hr/>
           <font size="-1">
              Copyright 2001 
              <a href="http://www.macfaq.com/personal.html">Elliotte Rusty Harold</a><br/>       
              <a href="mailto:elharo@metalab.unc.edu">elharo@metalab.unc.edu</a><br/>
              Last Modified <xsl:apply-templates select="last_modified" mode="lm"/>
           </font>
         </body>     
       </html>     
     </xsl:document>  

xsl:script Top-level Element


xsl:script with Java

<?xml version="1.0"?>
<xsl:stylesheet version="1.1"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:date="http://www.cafeconleche.org/ns/"
>

  <xsl:template match="/">
    <xsl:value-of select="date:new()"/>
  </xsl:template>

  <xsl:script
    implements-prefix="date"
    language="java"
    src="java:java.util.Date"
  />

</xsl:stylesheet>

xsl:script with JavaScript

<?xml version="1.0"?>
<xsl:stylesheet version="1.1"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:date="http://www.cafeconleche.org/ns/date"
>

  <xsl:template match="/">
    <xsl:value-of select="date:clock()"/>
  </xsl:template>

  <xsl:script
    implements-prefix="date"
    language="javascript">
    
    function clock() {
      var time = new Date();
      var hours = time.getHours();
      var min = time.getMinutes();
      var sec = time.getSeconds();
      var status = "AM";
      if (hours > 11) {
        status = "PM";
      }
      if (hours < 11) {
        hours -= 12;
      }
      if (min < 10) {
        min = "0" + min;
      }
      if (sec < 10) {
        sec = "0" + sec;
      }
      return hours + ":" + min + ":" + sec + " " + status;
   }
   
  </xsl:script>  

</xsl:stylesheet>

XPath 2.0


XPath 2.0 Goals


XPath 2.0 Requirements


XSLT 2.0


XSLT 2.0 Goals


XSLT 2.0 Non-goals


XSLT 2.0 Requirements


XQuery

Three parts:


XQuery Language


Documents to Query


Physical Representations to Query


Where is XQuery used?


The XML Model vs. the Relational Model

A relational database contains tables An XML database contains collections
A relational table contains records with the same schema A collection contains XML documents with the same DTD
A relational record is an unordered list of named values An XML document is a tree of nodes
A SQL query returns an unordered set of records An XQuery returns an ordered node set

Query Data Types


An example document to query

Most of the examples in this talk query this bibliography document at the (fictional) URL http://www.bn.com/bib.xml:

<bib>
  <book year="1994">
    <title>TCP/IP Illustrated</title>
    <author><last>Stevens</last><first>W.</first></author>
    <publisher>Addison-Wesley</publisher>
    <price> 65.95</price>
  </book>

  <book year="1992">
    <title>Advanced Programming in the Unix Environment</title>
    <author><last>Stevens</last><first>W.</first></author>
    <publisher>Addison-Wesley</publisher>
    <price>65.95</price>
  </book>

  <book year="2000">
    <title>Data on the Web</title>
    <author><last>Abiteboul</last><first>Serge</first></author>
    <author><last>Buneman</last><first>Peter</first></author>
    <author><last>Suciu</last><first>Dan</first></author>
    <publisher>Morgan Kaufmann Publishers</publisher>
    <price> 39.95</price>
  </book>

  <book year="1999">
    <title>The Economics of Technology and Content for Digital TV</title>
    <editor>
      <last>Gerbarg</last><first>Darcy</first>
      <affiliation>CITI</affiliation>
    </editor>
      <publisher>Kluwer Academic Publishers</publisher>
    <price>129.95</price>
  </book>

</bib>

Adapted from Mary Fernandez, Jerome Simeon, and Phil Wadler: XML Query Languages: Experiences and Exemplars, 1999, as adapted in XML Query Use Cases


Bibliography DTD

<!ELEMENT bib (book* )>
<!ELEMENT book  (title,  (author+ | editor+ ), publisher, price )>
<!ATTLIST book year CDATA  #REQUIRED >

<!ELEMENT author      (last, first )>
<!ELEMENT editor      (last, first, affiliation )>
<!ELEMENT title       (#PCDATA )>
<!ELEMENT last        (#PCDATA )>
<!ELEMENT first       (#PCDATA )>
<!ELEMENT affiliation (#PCDATA )>
<!ELEMENT publisher   (#PCDATA )>
<!ELEMENT price       (#PCDATA )>

Adapted from XML Query Use Cases


The XQuery FLWR


Query: List titles of all books

   FOR $t IN document("http://www.bn.com")/bib/book/title
   RETURN
     $t

Adapted from XML Query Use Cases


Query Result: Book Titles

  <title>TCP/IP Illustrated</title>
  <title>Advanced Programming in the Unix Environment</title>
  <title>Data on the Web</title>
  <title>The Economics of Technology and Content for Digital TV</title>
 

Adapted from XML Query Use Cases


Element Constructors

List titles of all books in a bib element. Put each title in a book element.

<bib>
   FOR $t IN document("http://www.bn.com")/bib/book/title
   RETURN
    <book>
     $t
    </book>
</bib>

Adapted from XML Query Use Cases


Query Result: Book Titles

<bib>
  <book>
    <title>TCP/IP Illustrated</title>
  </book>
  <book>
    <title>Advanced Programming in the Unix Environment</title>
  </book>
  <book>
    <title>Data on the Web</title>
  </book>
  <book>
    <title>The Economics of Technology and Content for Digital TV</title>
  </book>
</bib>
 

Adapted from XML Query Use Cases


Query with WHERE

Adapted from XML Query Use Cases


Query Result: Titles of books published by Addison-Wesley

<bib>
    <title>TCP/IP Illustrated</title>
    <title>Advanced Programming in the Unix Environment</title>
</bib>
 

Adapted from XML Query Use Cases


Query with Booleans

Adapted from XML Query Use Cases


Query Result: books published by Addison-Wesley after 1993

<bib>
    <title>Advanced Programming in the Unix Environment</title>
</bib>
 

Adapted from XML Query Use Cases


Attribute Constructors

Adapted from XML Query Use Cases


Query Result: books published by Addison-Wesley after 1993, including their year and title.

<bib>
  <book year="1992">
    <title>Advanced Programming in the Unix Environment</title>
  </book>
</bib>
 

Adapted from XML Query Use Cases


Query with multiple variables

Create a list of all the title-author pairs, with each pair enclosed in a result element.

<results>
   FOR $b IN document("http://www.bn.com")/bib/book,
     $t IN $b/title,
     $a IN $b/author
   RETURN
    <result>
     $t,
     $a
    </result>
</results>

Adapted from XML Query Use Cases


Query Result: A list of all the title-author pairs

<results>
    <result>
         <title>TCP/IP Illustrated</title>
         <author><last>Stevens</last><first>W.</first></author>
    </result>
    <result>
         <title>Advanced Programming in the Unix Environment</title>
         <author><last>Stevens</last><first>W.</first></author>
    </result>
    <result>
         <title>Data on the Web</title>
         <author><last>Abiteboul</last><first>Serge</first></author>
    </result>
    <result>
         <title> Data on the Web</title>
         <author><last>Buneman</last><first>Peter</first></author>
    </result>
    <result>
         <title>Data on the Web</title>
         <author><last>Suciu</last><first>Dan</first></author>
    </result>
</results>
 

Adapted from XML Query Use Cases


Nested Queries

For each book in the bibliography, list the title and authors, grouped inside a result element.

<results>
   FOR $b IN document("http://www.bn.com")/bib/book
   RETURN
    <result>
     $b/title,
     FOR $a IN $b/author
     RETURN $a
    </result>
</results>

Adapted from XML Query Use Cases


Query Result: A list of the title and authors of each book in the bibliography, grouped inside a "result" element.

<results>
  <result>
    <title>TCP/IP Illustrated</title>
    <author><last>Stevens</last><first>W.</first></author>
  </result>

  <result>
    <title>Advanced Programming in the Unix Environment</title>
    <author><last>Stevens</last><first>W.</first></author>
  </result>

  <result>
    <title>Data on the Web</title>
    <author><last>Abiteboul</last><first>Serge</first></author>
    <author><last>Buneman</last><first>Peter</first></author>
    <author><last>Suciu</last><first>Dan</first></author>
  </result>
</results>
 

Adapted from XML Query Use Cases


Query with distinct

For each author in the bibliography, list the author's name and the titles of all books by that author, grouped inside a result element.

<results>
   FOR $a IN distinct(document("http://www.bn.com")//author)
   RETURN
    <result>
     $a,
     FOR $b IN document("http://www.bn.com")/bib/book[author=$a]
     RETURN $b/title
    </result>
</results>

Adapted from XML Query Use Cases


Query Result

<results>
  <result>
    <author><last>Stevens</last><first>W.</first></author>
    <title>TCP/IP Illustrated</title>
    <title>Advanced Programming in the Unix Environment</title>
  </result>

  <result>
    <author><last>Abiteboul</last><first>Serge</first></author>
    <title>Data on the Web</title>
  </result>

  <result>
    <author><last>Buneman</last><first>Peter</first></author>
    <title>Data on the Web</title>
  </result>

  <result>
    <author><last>Suciu</last><first>Dan</first></author>
      <title>Data on the Web</title>
  </result>
</results>
 

Adapted from XML Query Use Cases


IF THEN ELSE

Query: For each book that has at least one author, list the title and first two authors, and an empty "et-al" element if the book has additional authors

<bib>
   FOR $b IN document("http://www.bn.com/bib.xml")//book
   WHERE count($b/author) > 0
   RETURN
    <book>
     $b/title,
     FOR $a IN $b/author[RANGE 1 TO 2] RETURN $a,
     IF count($b/author) > 2 THEN <et-al/> ELSE [ ]
    </book>
</bib>

Adapted from XML Query Use Cases


Query Result: Books with the first two authors

<bib>
  <book>
    <title>TCP/IP Illustrated</title>
    <author><last>Stevens</last><first>W.</first></author>
  </book>

  <book>
    <title>Advanced Programming in the Unix Environment</title>
    <author><last>Stevens</last><first>W.</first></author>
  </book>

  <book>
    <title>Data on the Web</title>
      <author><last>Abiteboul</last><first> Serge</first></author>
      <author><last>Buneman</last><first>Peter</first></author>
      <et-al/>
  </book>
</bib>
  

Adapted from XML Query Use Cases


Query with sorting

List the titles and years of all books published by Addison-Wesley after 1991, in alphabetic order.

<bib>
   FOR $b IN document("http://www.bn.com/bib.xml")//book
    [publisher = "Addison-Wesley" AND @year > "1991"]
   RETURN
    <book>
     $b/@year,
     $b/title
    </book> SORTBY (title)
</bib>

Adapted from XML Query Use Cases


Query Result

<bib>
  <book year="1992">
    <title>Advanced Programming in the Unix Environment</title>
  </book>
  <book year="1994">
    <title>TCP/IP Illustrated</title>
   </book>
</bib>
  

Adapted from XML Query Use Cases


Queries with string functions

FOR $b IN document("http://www.bn.com/bib.xml")//book,
  $e IN $b/*[contains(string(.), "Suciu")]
WHERE ends_with(name($e), "or")
RETURN
   <book>
    $b/title,
    $e
   </book>

Adapted from XML Query Use Cases


Query Result

<book>
  <title> Data on the Web </title>
  <author> <last> Suciu </last> <first> Dan </first> </author>
</book>
  

Adapted from XML Query Use Cases


A different document about books

Amazon sample data at "http://www.amazon.com/reviews.xml":

<reviews>
    <entry>
      <title> Data on the Web</title>
      <price>34.95</price>
      <review>
         A very good discussion of semi-structured database
         systems and XML.
      </review>
    </entry>
  
    <entry>
      <title> Advanced Programming in the Unix Environment</title>
      <price>65.95</price>
      <review>
         A clear and detailed discussion of UNIX programming.
      </review>
    </entry>
  
    <entry>
      <title>TCP/IP Illustrated</title>
      <price>65.95</price>
      <review>
         One of the best books on TCP/IP.
      </review>
    </entry>
  
  </reviews>
  

Adapted from XML Query Use Cases


This document uses a different DTD

  <!ELEMENT reviews (entry*)>
  <!ELEMENT entry   (title, price, review)>
  <!ELEMENT title   (#PCDATA)>
  <!ELEMENT price   (#PCDATA)>
  <!ELEMENT review  (#PCDATA)>
   

Query that joins two documents

For each book found at both bn.com and amazon.com, list the title of the book and its price from each source.

<books-with-prices>
   FOR $b IN document("http://www.bn.com/bib.xml")//book,
     $a IN document("http://www.amazon.com/reviews.xml")//entry
   WHERE $b/title = $a/title
   RETURN
    <book-with-prices>
     $b/title,
     <price-amazon> $a/price/text() </price-amazon>,
     <price-bn> $b/price/text() </price-bn>
    </book-with-prices>
</books-with-prices>

Adapted from XML Query Use Cases


Result

<books-with-prices>
  <book-with-prices>
    <title>TCP/IP Illustrated</title>
    <price-amazon>65.95</price-amazon>
    <price-bn>65.95</price-bn>
  </book-with-prices>

  <book-with-prices>
    <title>Advanced Programming in the Unix Environment</title>
    <price-amazon>65.95</price-amazon>
    <price-bn>65.95</price-bn>
  </book-with-prices>

  <book-with-prices>
    <title>Data on the Web</title>
    <price-amazon>34.95</price-amazon>
    <price-bn>39.95</price-bn>
  </book-with-prices>
</books-with-prices>
  

Adapted from XML Query Use Cases


Prices DTD

The next query also uses an input document named "prices.xml", with this DTD:

  <!ELEMENT prices (book*)>
  <!ELEMENT book   (title, source, price)>
  <!ELEMENT title  (#PCDATA)>
  <!ELEMENT source (#PCDATA)>
  <!ELEMENT price  (#PCDATA)>
   

prices.xml Query Sample Data

<prices>
    <book>
      <title>Advanced Programming in the Unix Environment</title>
      <source>www.amazon.com</source>
      <price>65.95</price>
    </book>
  
    <book>
      <title>Advanced Programming in the Unix Environment </title>
      <source>www.bn.com</source>
      <price>65.95</price>
    </book>
  
    <book>
      <title> TCP/IP Illustrated </title>
      <source>www.amazon.com</source>
      <price>65.95</price>
    </book>
  
    <book>
      <title> TCP/IP Illustrated </title>
      <source>www.bn.com</source>
      <price>65.95</price>
    </book>
  
    <book>
      <title>Data on the Web</title>
      <source>www.amazon.com</source>
      <price>34.95</price>
    </book>
  
    <book>
      <title>Data on the Web</title>
      <source>www.bn.com</source>
      <price>39.95</price>
    </book>
  </prices>
   

Adapted from XML Query Use Cases


Query with reused variables

<results>
   LET $doc := document("prices.xml")
   FOR $t IN distinct($doc/book/title)
   LET $p := $doc/book[title = $t]/price
   RETURN
    <minprice title = $t/text()>
     min($p)
    </minprice>
</results>

Adapted from XML Query Use Cases


Query Result

<results>
  <minprice title="Advanced Programming in the Unix Environment"> 65.95 </minprice>
  <minprice title="TCP/IP Illustrated"> 65.95 </minprice>
  <minprice title="Data on the Web"> 34.95 </minprice>
</results>   

Adapted from XML Query Use Cases


Multiple FLWR Queries

<bib>
   FOR $b IN document("http://www.bn.com/bib.xml")//book[author]
   RETURN
    <book>
     $b/title,
     $b/author
    </book>,
   FOR $b IN document("http://www.bn.com/bib.xml")//book[editor]
   RETURN
    <reference>
     $b/title,
     <org> $b/editor/affiliation/text() </org>
    </reference>
</bib>

Adapted from XML Query Use Cases


Query Result

<bib>
    <book>
         <title>TCP/IP Illustrated</title>
         <author><last> Stevens </last> <first> W.</first></author>
    </book>

    <book>
         <title>Advanced Programming in the Unix Environment</title>
         <author><last>Stevens</last><first>W.</first></author>
    </book>

    <book>
         <title>Data on the Web</title>
         <author><last>Abiteboul</last><first>Serge</first></author>
         <author><last>Buneman</last><first>Peter</first></author>
         <author><last>Suciu</last><first>Dan</first></author>
    </book>

    <reference>
        <title>The Economics of Technology and Content for Digital TV</title>
        <org>CITI</org>
    </reference>
</bib>

  

Adapted from XML Query Use Cases


Query Software


To Learn More


Part IV: XML Hypertext

Once you've tasted XLink's Chunky Monkey, it's hard to reconcile yourself to HTML's vanilla.
--John E. Simpson on the xsl-list mailing list


Linking in XML is divided into multiple parts


XML Hypertext Example

<?xml version="1.0"?>
<story date="January 9, 2001"
       xmlns:xlink="http://www.w3.org/1999/xlink"
       xmlns:xinclude="http://www.w3.org/1999/XML/xinclude"
       xml:base="http://www.cafeaulait.org/">

  <p>
    The W3C XML Linking Working Group has pushed the 
    <link xlink:href="http://www.w3.org/TR/2001/WD-xptr-20010108">
      XPointer specification
    </link> 
    back to working draft status. The specific issue that was 
    uncovered during Candidate Recommendation was some 
    <link xlink:type="simple"
      xlink:href="http://www.w3.org/TR/xptr#xpointer(//div[@class='div3'][7])">
      confusion
    </link> 
    over how to integrate XPointers, particularly those in non-XML documents, 
    with namespaces. 
   </p>

   <p>
     It's also come to light in this draft that Sun has 
     <link xlink:type="simple"
      xlink:href=
      "http://lists.w3.org/Archives/Public/www-xml-linking-comments/2000OctDec/0092.html"
      >
      claimed a patent</link> on some of the technologies needed to 
      implement XPointer. I think this is particularly offensive because Eve 
      L. Maler, a Sun employee, served as co-chair of the XML Linking 
      Working Group and a co-editor of the XPointer specification. As usual 
      Sun wants to use this as a club to lock implementers and users into a 
      licensing agreement that goes beyond what Sun and the W3C could 
      otherwise demand. The specific patent is <cite>United States Patent 
      No. 5,659,729, Method and system for implementing hypertext scroll 
      attributes</cite>, issued to Jakob Nielsen in 1997. The patent was 
      filed on February 1, 1996. It claims:
  </p>
  <blockquote>
    <xinclude:include 
      href=
      "http://www.delphion.com/details?&pn=US05659729__#xpointer(//abstract)"
      >
    </xinclude:include>
  </blockquote>
  
</story>

Versions

This talk covers:


XLinks

<footnote xlink:type="simple" xlink:href="footnote7.xml">7</footnote>

Extended Links


Extended Link Example

<?xml version="1.0"?>
<WEBSITE xmlns:xlink="http://www.w3.org/1999/xlink" 
         xlink:type="extended" xlink:title="Cafe au Lait">
         
  <NAME xlink:type="resource" xlink:label="source">
    Cafe au Lait
  </NAME>

  <HOMESITE xlink:type="locator" 
            xlink:href="http://ibiblio.org/javafaq/"
            xlink:label="us"/>
  
  <MIRROR xlink:type="locator" 
          xlink:title="Cafe au Lait Swedish Mirror"
          xlink:label="se"
          xlink:href="http://sunsite.kth.se/javafaq"/>
  
  <MIRROR xlink:type="locator" 
          xlink:title="Cafe au Lait German Mirror"
          xlink:label="de"
          xlink:href="http://sunsite.informatik.rwth-aachen.de/javafaq/"/>
  
  <MIRROR xlink:type="locator" 
          xlink:title="Cafe au Lait Swiss Mirror"
          xlink:label="ch"
          xlink:href="http://sunsite.cnlab-switch.ch/javafaq/"/>
  
  <CONNECTION xlink:type="arc" xlink:from="source" 
              xlink:to="ch"    xlink:show="replace" 
              xlink:actuate="onRequest"/>
  <CONNECTION xlink:type="arc" xlink:from="source" 
              xlink:to="us"    xlink:show="replace" 
              xlink:actuate="onRequest"/>
  <CONNECTION xlink:type="arc" xlink:from="source" 
              xlink:to="se"    xlink:show="replace" 
              xlink:actuate="onRequest"/>
  <CONNECTION xlink:type="arc" xlink:from="source" 
              xlink:to="sk"    xlink:show="replace" 
              xlink:actuate="onRequest"/>
  
</WEBSITE>

Diagram of an Extended Link

An extended link with arcs

XPointers

The many advantages of descriptive pointing are crucial for a scalable, generic pointing system. Descriptive pointing is crucial for all the same reasons that descriptive markup is crucial to documents, and that making links first-class objects is crucial to linking. It is also clearly feasible, as shown by multiple implementations of the prior WDs from the XML WG, and of TEI extended pointers.
--XML Linking Working Group, XML XPointer Requirements


XPointers


What are XPointers?


Why Use XPointers?


XPointer Examples

xpointer(id("ebnf"))
xpointer(descendant::language[position()=2])
ebnf
xpointer(/child::spec/child::body/child::*/child::language[position()=2])
/1/14/2
xpointer(id("ebnf"))xpointer(id("EBNF"))

XPointers in URIs


XPointers in XLinks

<SPECIFICATION xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.w3.org/TR/1998/REC-xml-19980210.xml#xpointer(id('ebnf'))"> xlink:actuate="onRequest" xlink:show="replace">
Extensible Markup Language (XML) 1.0
</SPECIFICATION>


A Concrete Example

<?xml version="1.0"?>
<!DOCTYPE FAMILYTREE [

  <!ELEMENT FAMILYTREE (PERSON | FAMILY)*>

  <!-- PERSON elements --> 
  <!ELEMENT PERSON (NAME*, BORN*, DIED*, SPOUSE*)>
  <!ATTLIST PERSON 
    ID      ID     #REQUIRED
    FATHER  CDATA  #IMPLIED
    MOTHER  CDATA  #IMPLIED
  >
  <!ELEMENT NAME (#PCDATA)>
  <!ELEMENT BORN (#PCDATA)>
  <!ELEMENT DIED  (#PCDATA)>
  <!ELEMENT SPOUSE EMPTY>
  <!ATTLIST SPOUSE IDREF IDREF #REQUIRED>
  
  <!--FAMILY--> 
  <!ELEMENT FAMILY (HUSBAND?, WIFE?, CHILD*) >
  <!ATTLIST FAMILY ID ID #REQUIRED>
  
  <!ELEMENT HUSBAND EMPTY>
  <!ATTLIST HUSBAND IDREF IDREF #REQUIRED>
  <!ELEMENT WIFE EMPTY>
  <!ATTLIST WIFE IDREF IDREF #REQUIRED>
  <!ELEMENT CHILD EMPTY>
  <!ATTLIST CHILD IDREF IDREF #REQUIRED>

]>
<FAMILYTREE>

  <PERSON ID="p1">
    <NAME>Domeniquette Celeste Baudean</NAME>
    <BORN>21 Apr 1836</BORN>
    <DIED>Unknown</DIED>
    <SPOUSE IDREF="p2"/>
  </PERSON>

  <PERSON ID="p2">
    <NAME>Jean Francois Bellau</NAME>
    <SPOUSE IDREF="p1"/>
  </PERSON>

  <PERSON ID="p3" FATHER="p2" MOTHER="p1">
    <NAME>Elodie Bellau</NAME>
    <BORN>11 Feb 1858</BORN>
    <DIED>12 Apr 1898</DIED>
    <SPOUSE IDREF="p4"/>
  </PERSON>

  <PERSON ID="p4" FATHER="p2" MOTHER="p1">
    <NAME>John P. Muller</NAME>
    <SPOUSE IDREF="p3"/>
  </PERSON>

  <PERSON ID="p7">
    <NAME>Adolf Eno</NAME>
    <SPOUSE IDREF="p6"/>
  </PERSON>

  <PERSON ID="p6" FATHER="p2" MOTHER="p1">
    <NAME>Maria Bellau</NAME>
    <SPOUSE IDREF="p7"/>
  </PERSON>

  <PERSON ID="p5" FATHER="p2" MOTHER="p1">
    <NAME>Eugene Bellau</NAME>
  </PERSON>

  <PERSON ID="p8" FATHER="p2" MOTHER="p1">
    <NAME>Louise Pauline Bellau</NAME>
    <BORN>29 Oct 1868</BORN>
    <DIED>3 May 1938</DIED>
    <SPOUSE IDREF="p9"/>
  </PERSON>

  <PERSON ID="p9">
    <NAME>Charles Walter Harold</NAME>
    <BORN>about 1861</BORN>
    <DIED>about 1938</DIED>
    <SPOUSE IDREF="p8"/>
  </PERSON>

  <PERSON ID="p10" FATHER="p2" MOTHER="p1">
    <NAME>Victor Joseph Bellau</NAME>
    <SPOUSE IDREF="p11"/>
  </PERSON>

  <PERSON ID="p11">
    <NAME>Ellen Gilmore</NAME>
    <SPOUSE IDREF="p10"/>
  </PERSON>

  <PERSON ID="p12" FATHER="p2" MOTHER="p1">
    <NAME>Honore Bellau</NAME>
  </PERSON>

  <FAMILY ID="f1">
    <HUSBAND IDREF="p2"/>
    <WIFE IDREF="p1"/>
    <CHILD IDREF="p3"/>
    <CHILD IDREF="p5"/>
    <CHILD IDREF="p6"/>
    <CHILD IDREF="p8"/>
    <CHILD IDREF="p10"/>
    <CHILD IDREF="p12"/>
  </FAMILY>

  <FAMILY ID="f2">
    <HUSBAND IDREF="p7"/>
    <WIFE IDREF="p6"/>
  </FAMILY>

</FAMILYTREE>

Location Paths, Steps, and Sets


Location Steps


Location Paths


Location Paths that Identify Multiple Nodes


Axes


Location Step Axes

Axis Selects From
ancestor the parent of the context node, the parent of the parent of the context node, the parent of the parent of the parent of the context node, and so forth back to the root node
ancestor-or-self the ancestors of the context node and the context node itself
attribute the attributes of the context node
child the immediate children of the context node
descendant the children of the context node, the children of the children of the context node, and so forth
descendant-or-self the context node itself and its descendants
following all nodes that start after the end of the context node, excluding attribute and namespace nodes
following-sibling all nodes that start after the end of the context node and have the same parent as the context node
parent the unique parent node of the context node
preceding all nodes that end before the beginning of the context node, excluding attribute and namespace nodes
preceding-sibling all nodes that start before the beginning of the context node and have the same parent as the context node
self the context node

Node Tests


Predicates


Boolean Conversion


The position() function


Identifying an element by its position

xpointer(/child::FAMILYTREE/child::*[1])
xpointer(/child::FAMILYTREE/child::*[2])
xpointer(/child::FAMILYTREE/child::*[3])
xpointer(/child::FAMILYTREE/child::*[4])
xpointer(/child::FAMILYTREE/child::*[5])
xpointer(/child::FAMILYTREE/child::*[6])
xpointer(/child::FAMILYTREE/child::*[7])
xpointer(/child::FAMILYTREE/child::*[8])
xpointer(/child::FAMILYTREE/child::*[9])
xpointer(/child::FAMILYTREE/child::*[10])
xpointer(/child::FAMILYTREE/child::*[11])
xpointer(/child::FAMILYTREE/child::*[12])
xpointer(/child::FAMILYTREE/child::*[13])
xpointer(/child::FAMILYTREE/child::*[14])

Functions that Return Node Sets

The last two, here() and origin() are XPointer extensions to XPath that are not available in XSLT.


id()


here()

Consider a simple slide show. In this example, here()/following::SLIDE[1] refers to the next slide in the show. here()/preceding::SLIDE[1] refers to the previous slide in the show. Presumably this would be used in conjunction with a style sheet that showed one slide at a time.

<?xml version="1.0"?>
<SLIDESHOW xmlns:xlink="http://www.w3.org/1999/xlink">
  <SLIDE>
    <H1>Welcome to the slide show!</H1>
    <BUTTON xlink:type="simple"
            xlink:href="here()/following::SLIDE[1]">
      Next
    </BUTTON>
  </SLIDE>
  <SLIDE>
    <H1>This is the second slide</H1>
    <BUTTON xlink:type="simple" 
            xlink:href="here()/preceding::SLIDE[1]">
      Previous
    </BUTTON>
    <BUTTON xlink:type="simple" 
            xlink:href="here()/following::SLIDE[1]">
      Next
    </BUTTON>
  </SLIDE>
  <SLIDE>
    <H1>This is the second slide</H1>
    <BUTTON xlink:type="simple" 
            xlink:href="here()/preceding::SLIDE[1]">
      Previous
    </BUTTON>
    <BUTTON xlink:type="simple" 
           xlink:href="here()/following::SLIDE[1]">
      Next
    </BUTTON>
  </SLIDE>
  <SLIDE>
    <H1>This is the third slide</H1>
    <BUTTON xlink:type="simple" 
            xlink:href="here()/preceding::SLIDE[1]">
      Previous
    </BUTTON>
    <BUTTON xlink:type="simple" 
            xlink:href="here().following(1,SLIDE)">
      Next
    </BUTTON>
  </SLIDE>
  ...
  <SLIDE>
    <H1>This is the last slide</H1>
    <BUTTON xlink:type="simple"
            xlink:href="here()/preceding::SLIDE[1]">
      Previous
    </BUTTON>
  </SLIDE>

</SLIDESHOW>

Generally, the here() location term is only used in fully relative URIs in XLinks. If any URI part is included, it must be the same as the URI of the current document.


origin()

The origin() function is much the same as here(); that is, it refers to the source of a link. However, origin() is used in out-of-line links where the link is not actually present in the source document. It points to the element in the source document from which the user activated the link.


Points

Every point is either between two nodes or between two characters in the parsed character data of a document. To make sense of this you have to remember that parsed character data is part of a text node. For instance, consider this very simple but well-formed XML document:

<GREETING>
  Hello
</GREETING>

Tree Structure

There are exactly three nodes and 13 distinct points in this document. In order the points are:

The exact details of the white space in the document are not considered here. XPointer collapses all runs of white space to a single space.


Point Expressions


Ranges

In some applications it may be important to specify a range across a document rather than a particular point in the document. For instance, the selection a user makes with a mouse is not necessarily going to match up with any one element or node. It may start in the middle of one paragraph, extend across a heading and a picture and then into the middle of another paragraph two pages down. Any such contiguous area of a document can be described with a range.


Range Expressions


Range Functions

range(location-set)
returns returns a location set containing one range for each location in the argument.

The range is the minimum range necessary to cover the entire location.

range-inside(location-set)

Returns a location set containing the interiors of each of the locations in the input.

start-point(location-set)

Returns a location set that contains one point representing the first point of each location in the input location set. For example, start-point(//PERSON[1]) Returns the point immediately before the first PERSON element. start-point(//PERSON) returns the set of points immediately before each PERSON element.

end-point(location-set)

The same as start-point() except that it returns the points immediately after each location in its input.


String Ranges

string-range(node-set,substring,index,length)


XPointers and Namespaces


Child Sequences


XPointer Summary


To Learn More



What is XML Base?


The xml:base attribute

<slide xml:base="http://www.ibiblio.org/xml/slides/xmloneaustin2001/xlinks/">
  <title>The xml:base attribute</title>
  ...
  <previous xlink:type="simple" xlink:href="What_Is_XBase.xml"/>
  <next xlink:type="simple" xlink:href="xbaseexample.xml"/>
</slide>


XML Base Example

<COURSE xmlns:xlink="http://www.w3.org/1999/xlink"
         xml:base="http://www.ibiblio.org/javafaq/course/"
         xlink:type="extended">

  <TOC xlink:type="locator" xlink:href="index.html" xlink:label="index"/>

  <CLASS xlink:type="locator" xlink:label="class"
         xlink:href="week1.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week2.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week3.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week4.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week5.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week6.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week7.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week8.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week9.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week10.xml"/> 
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week11.xml"/> 
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week12.xml"/>
  <CLASS xlink:type="locator" xlink:label="class" 
         xlink:href="week13.xml"/>
  
  <CONNECTION xlink:type="arc" from="index" to="class"/>
  <CONNECTION xlink:type="arc" from="class" to="index"/>
  
</COURSE>

Open Issues


To Learn More


XInclude

The problem is that we're not providing the tools. We're providing the specs. That's a whole different ball game. If tools existed for actually making really interesting use of RDF and XLink and XInclude then people would use them. If IE and/or Mozilla supported the full gamut of specs, from XSLT 1.0 to XLink and XInclude (OK, so they're not quite REC's, but with time...) then you would find people using them more.
--Matt Sergeant on the xml-dev mailing list


What is XInclude?


Alternatives (and why they don't work)


The include element

<book xmlns:xinclude="http://www.w3.org/1999/XML/xinclude">
  <title>Processing XML with Java</title>
  <chapter><xinclude:include href="dom.xml"/></chapter>
  <chapter><xinclude:include href="sax.xml"/></chapter>
  <chapter><xinclude:include href="jdom.xml"/></chapter>
</book>

The parse attribute

parse="xml"
The resource must be parsed as XML and the infosets merged. This is the default.
parse="text"
The resource must be treated as pure text and inserted as a text node. When serialized, this means that characters like < will change to &lt; and so forth.
<slide xmlns:xinclude="http://www.w3.org/1999/XML/xinclude">
  <title>The href attribute</title>
  
<ul>
  <li>Identifies the document to be included with a URI</li>
  <li>The document at the URI replaces the <code>include</code> 
      element in the including document</li>
  <li>The <code>xinclude</code> prefix is bound to the http://www.w3.org/1999/XML/xinclude
  namespace URI. 
  </li>
</ul>  

<pre><code><xinclude:include parse="text" href="processing_xml_with_java.xml"/>
</code></pre>
        
  <description>
      A slide from Elliotte Rusty Harold's XML and Hypertext seminar at
      <host_ref/>, <date_ref/>
    </description>
  <last_modified>October 26, 2000</last_modified>
</slide>


Implementation as JDOM

/*--

 Copyright 2000 Elliotte Rusty Harold.
 All rights reserved.

 I haven't yet decided on a license.
 It will be some form of open source.

 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

 */

package com.macfaq.xml;

import java.net.URL;
import java.net.MalformedURLException;
import java.util.Stack;
import java.util.Iterator;
import java.util.List;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.BufferedInputStream;
import java.io.InputStream;
import org.jdom.Namespace;
import org.jdom.Comment;
import org.jdom.CDATA;
import org.jdom.JDOMException;
import org.jdom.Attribute;
import org.jdom.Element;
import org.jdom.ProcessingInstruction;
import org.jdom.Document;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;

/**
 * <p><code>XIncluder</code> provides methods to
 * resolve JDOM elements and documents to produce
 * a new Document or Element with all
 * XInclude references resolved.
 * </p>
 *
 *
 * @author Elliotte Rusty Harold
 * @version 1.0d2
 */
public class XIncluder {

  public final static Namespace XINCLUDE_NAMESPACE
    = Namespace.getNamespace("xinclude", "http://www.w3.org/1999/XML/xinclude");

  // No instances allowed
  private XIncluder() {}

  private static SAXBuilder builder = new SAXBuilder();

  /**
    * <p>
    * This method resolves a JDOM <code>Document</code>
    * and merges in all XInclude references.
    * If a referenced document cannot be found it is replaced with
    * an error message. The Document object returned is a new document.
    * The original <code>Document</code> is not changed.
    * </p>
    *
    * @param original <code>Document</code> that will be processed
    * @param base     <code>String</code> form of the base URI against which
    *                 relative URLs will be resolved. This can be null if the
    *                 document includes an <code>xml:base</code> attribute.
    * @return Document new <code>Document</code> object in which all
    *                  XInclude elements have been replaced.
    */
    public static Document resolve(Document original, String base)
      throws CircularIncludeException, MalformedURLException {

    if (original == null) throw new NullPointerException("Document must not be null");

    Element root = original.getRootElement();
    Element resolved = (Element) resolve(root, base);

    // catch a ClassCastException if a String is returned????
    // Is the root element allowed to be replaced by
    // an xinclude:type="text"

    Document result = new Document(resolved, original.getDocType());

    Iterator iterator = original.getMixedContent().iterator();
    while (iterator.hasNext()) {
      Object o = iterator.next();
      if (o instanceof Comment) {
        Comment c = (Comment) o;
        result.addContent((Comment) c.clone());
      }
      else if (o instanceof ProcessingInstruction) {
        ProcessingInstruction pi =(ProcessingInstruction) o;
        result.addContent((ProcessingInstruction) pi.clone());
      }
    }

    return result;
  }

  /**
    * <p>
    * This method resolves a JDOM <code>Element</code>
    * and merges in all XInclude references. This process is recursive.
    * The element returned contains no XInclude elements.
    * If a referenced document cannot be found it is replaced with
    * an error message. The <code>Element</code> object returned is a new element.
    * The original <code>Element</code> is not changed.
    * </p>
    *
    * @param original <code>Element</code> that will be processed
    * @param base     <code>String</code> form of the base URI against which
    *                 relative URLs will be resolved. This can be null if the
    *                 element includes an <code>xml:base</code> attribute.
    * @return Object  Either an <code>Element</code>
    *                 (<code>xinclude:type="text"</code>) or a <code>String</code>
    *                 (<code>xinclude:type="parse"</code>)
    */
   public static Object resolve(Element original, String base)
     throws CircularIncludeException, MalformedURLException {

    if (original == null) {
      throw new NullPointerException("You can't XInclude a null element.");
    }
    Stack bases = new Stack();
    if (base != null) bases.push(base);

    Object result = resolve(original, bases);
    bases.pop();
    return result;

  }

  private static boolean isIncludeElement(Element element) {
    if (element.getName().equals("include") &&
        element.getNamespace().equals(XINCLUDE_NAMESPACE)) {
      return true;
    }
    return false;
  }


  // either returns an Element or a String
  protected static Object resolve(Element original, Stack bases)
   throws CircularIncludeException {

    Element result;
    String base = "";
    if (bases.size() != 0) base = (String) bases.peek();

    if (isIncludeElement(original)) {
      Attribute href = original.getAttribute("href");
      if (href == null) { // illegal, what kind of exception????
        throw new IllegalArgumentException("Missing href attribute");
      }
      Attribute baseAttribute
       = original.getAttribute("base", Namespace.XML_NAMESPACE);
      if (baseAttribute != null) base = baseAttribute.getValue();
      boolean parse = true;
      Attribute parseAttribute = original.getAttribute("parse");
      if (parseAttribute != null) {
        if (parseAttribute.getValue().equals("text")) parse = false;
      }

      URL remote;
      if (base != null) {
        try {
          URL context = new URL(base);
          remote = new URL(context, href.getValue());
        }
        catch (MalformedURLException ex) {
          return "Unresolvable URL " + base + "/" + href.getValue();
        }
      }
      else {
        try {
          remote = new URL(href.getValue());
        }
        catch (MalformedURLException ex) {
          return "Unresolvable URL " + href.getValue();
        }
      }

      if (parse) {
                 // checks for equality (OK) or identity (not OK)????
        if (bases.contains(remote.toExternalForm())) {
          // need to figure out how to get file and number where
          // bad include occurs
          throw new CircularIncludeException(
            "Circular XInclude Reference to "
           + remote.toExternalForm() + " in " );
        }

        try {
          Document doc = builder.build(remote);
          bases.push(remote.toExternalForm());
          result = (Element) resolve(doc.getRootElement(), bases);
          bases.pop();
        }
        // Make this configurable
        catch (JDOMException e) {
           return "Document not found: " + remote.toExternalForm()
            + "\r\n" + e.getMessage();
        }
      }
      else { // insert text
        return getURL(remote);
      }

    }
    // not an include element
    else { // recursively process children
       result = new Element(original.getName(), original.getNamespace());
       Iterator attributes = original.getAttributes().iterator();
       while (attributes.hasNext()) {
         Attribute a = (Attribute) attributes.next();
         result.addAttribute((Attribute) a.clone());
       }
       List children = original.getMixedContent();

       Iterator iterator = children.iterator();
       while (iterator.hasNext()) {
         Object o = iterator.next();
         if (o instanceof Element) {
           Element e = (Element) o;
           Object resolved = resolve(e, bases);
           if (resolved instanceof String) {
               result.addContent((String) resolved);
           }
           else result.addContent((Element) resolved);
         }
         else if (o instanceof String) {
           result.addContent((String) o);
         }
         else if (o instanceof Comment) {
           result.addContent((Comment) o);
         }
         else if (o instanceof CDATA) {
           result.addContent((CDATA) o);
         }
         else if (o instanceof ProcessingInstruction) {
           result.addContent((ProcessingInstruction) o);
         }
       }
    }

    return result;

  }

  public static String getURL(URL source) {
    StringBuffer s = new StringBuffer();
    try {
      InputStream in = new BufferedInputStream(source.openStream());
      // does XInclude give you anything to specify the character set????
      InputStreamReader reader = new InputStreamReader(in, "8859_1");
      int c;
      while ((c = in.read()) != -1) {
        if (c == '<') s.append("&lt;");
        else if (c == '&') s.append("&amp;");
        else s.append((char) c);
      }
      return s.toString();
    }
    catch (IOException e) {
      e.printStackTrace();
      return "Document not found: " + source.toExternalForm();
    }
  }

  public static void main(String[] args) {

    SAXBuilder builder = new SAXBuilder();
    XMLOutputter outputter = new XMLOutputter();
    for (int i = 0; i < args.length; i++) {
      try {
        Document input = builder.build(args[i]);
        // absolutize URL
        String base = args[i];
        if (base.indexOf(':') < 0) {
          File f = new File(base);
          base = f.toURL().toExternalForm();
        }
        Document output = resolve(input, base);
        // need to set encoding on this to Latin-1 and check what
        // happens to UTF-8 curly quotes
        outputter.output(output, System.out);
      }
      catch (Exception e) {
        System.err.println(e);
        e.printStackTrace();
      }
    }

  }

}

Implementation as DOM

/*--

 Copyright 2000 Elliotte Rusty Harold.
 All rights reserved.

 I haven't yet decided on a license.
 It will be some form of open source.

 THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESSED OR IMPLIED
 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED.  IN NO EVENT SHALL ELLIOTTE RUSTY HAROLD OR ANY
 OTHER CONTRIBUTORS TO THIS PACKAGE
 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

 */

package com.macfaq.xml;

import java.net.URL;
import java.net.MalformedURLException;
import java.util.Stack;
import org.xml.sax.SAXException;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.BufferedInputStream;
import java.io.InputStream;
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.Attr;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.DocumentType;
import org.w3c.dom.DOMImplementation;
import org.apache.xerces.parsers.DOMParser;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;

/**
 * <p><code>DOMXIncluder</code> provides methods to
 * resolve DOM elements and documents to produce
 * a new <code>Document</code> or <code>Element</code> with all
 * XInclude references resolved.
 * </p>
 *
 *
 * @author Elliotte Rusty Harold
 * @version 1.0d1
 */
public class DOMXIncluder {

  public final static String XINCLUDE_NAMESPACE
   = "http://www.w3.org/1999/XML/xinclude";

  // No instances allowed
  private DOMXIncluder() {}

  private static DOMParser parser = new DOMParser();

  /**
    * <p>
    * This method resolves a DOM <code>Document</code>
    * and merges in all XInclude references.
    * If a referenced document cannot be found it is replaced with
    * an error message. The <code>Document</code>
    * object returned is a new document.
    * The original <code>Document</code> object is not changed.
    * </p>
    *
    * @param original <code>Document</code> that will be processed
    * @param base     <code>String</code> form of the base URI against which
    *                 relative URLs will be resolved. This can be null if the
    *                 document includes an <code>xml:base</code> attribute.
    * @return Document new <code>Document</code> object in which all
    *                  XInclude elements have been replaced.
    * @throws CircularIncludeException if this document possesses a cycle of
    *                                  XIncludes.
    * @throws NullPointerException  if the original argument is null.
    */
    public static Document resolve(Document original, String base)
      throws CircularIncludeException, NullPointerException {

        if (original == null) {
          throw new NullPointerException("Document must not be null");
        }

        Element root = original.getDocumentElement();

        // catch a ClassCastException if a Text is returned????
        // Is the root element allowed to be replaced by
        // an parse="text"

        DOMImplementation impl = original.getImplementation();

        DocumentType oldDoctype = original.getDoctype();
        DocumentType newDoctype = impl.createDocumentType(
         oldDoctype.getName(),
         oldDoctype.getPublicId(),
         oldDoctype.getSystemId());

        Document resultDocument
         = impl.createDocument(root.getNamespaceURI(),
           root.getTagName(),
           newDoctype);
        // check that tag name is qualified name

        NodeList children = original.getChildNodes();
        for (int i = 0; i < children.getLength(); i++) {
          Node n = children.item(i);
          if (n instanceof Element) { // root element
              resultDocument.replaceChild(
               resolve(root, base, resultDocument),
               resultDocument.getDocumentElement()
             );
          }
          else if (n instanceof DocumentType) {
              // skip it, already cloned
          }
          else {
              resultDocument.appendChild(n.cloneNode(true));
          }
        }

        return resultDocument;
  }

  /**
    * <p>
    * This method resolves a DOM <code>Element</code>
    * and merges in all XInclude references. This process is recursive.
    * The element returned contains no XInclude elements.
    * If a referenced document cannot be found it is replaced with
    * an error message. The <code>Element</code> object returned is a new element.
    * The original <code>Element</code> is not changed.
    * </p>
    *
    * @param original <code>Element</code> that will be processed
    * @param base     <code>String</code> form of the base URI against which
    *                 relative URLs will be resolved. This can be null if the
    *                 element includes an <code>xml:base</code> attribute.
    * @param resolved <code>Document</code> into which the resolved element will be placed.
    * @return Node    Either an <code>Element</code>
    *                 (<code>parse="text"</code>) or a <code>Text</code>
    *                 (<code>parse="xml"</code>)
    * @throws CircularIncludeException if this <code>Element</code> contains an XInclude element
    *                                  that attempts to include a document in which
    *                                  this element is directly or indirectly included.
    * @throws NullPointerException  if the original argument is null.
    */
    public static Node resolve(Element original, String base, Document resolved)
     throws CircularIncludeException,  NullPointerException {

        if (original == null) {
          throw new NullPointerException(
           "You can't XInclude a null element."
          );
        }
        Stack bases = new Stack();
        if (base != null) bases.push(base);

        Node result = resolve(original, bases, resolved);
        bases.pop();
        return result;

    }

    private static boolean isIncludeElement(Element element) {

        if (element.getLocalName().equals("include") &&
            element.getNamespaceURI().equals(XINCLUDE_NAMESPACE)) {
          return true;
        }
        return false;

    }


  /**
    * <p>
    * This method resolves a DOM <code>Element</code>
    * and merges in all XInclude references. This process is recursive.
    * The element returned contains no XInclude elements.
    * If a referenced document cannot be found it is replaced with
    * an error message. The <code>Element</code> object returned is a new element.
    * The original <code>Element</code> is not changed.
    * </p>
    *
    * @param original <code>Element</code> that will be processed
    * @param bases    <code>Stack</code> containing the string forms of
    *                 all the URIs of doucments which contain this element
    *                 through XIncludes. This used to detect if a circular
    *                 reference is being used.
    * @param resolved <code>Document</code> into which the resolved element will be placed.
    * @return Node  Either an <code>Element</code>
    *                 (<code>parse="text"</code>) or a <code>String</code>
    *                 (<code>parse="xml"</code>)
    * @throws CircularIncludeException if this <code>Element</code> contains an XInclude element
    *                                  that attempts to include a document in which
    *                                  this element is directly or indirectly included.
    * @throws IllegalArgumentException if the href attribute is missing from an include element.
    */
  private static Node resolve(Element original, Stack bases, Document resolved)
   throws CircularIncludeException, IllegalArgumentException {

    Element result;
    String base = "";
    if (bases.size() != 0) base = (String) bases.peek();

    if (isIncludeElement(original)) {
      String href = original.getAttribute("href");
      if (href == null || href.equals("")) { // illegal, what kind of exception????
        throw new IllegalArgumentException("Missing href attribute");
      }
      String baseAttribute
       = original.getAttributeNS("http://www.w3.org/XML/1998/namespace", "base");
      if (base != null && !base.equals("")) {
        base = baseAttribute;
      }
      boolean parse = true;
      String parseAttribute = original.getAttribute("parse");
      if (parseAttribute != null && parseAttribute.equals("text")) {
          parse = false;
      }

      String remote;
      if (base != null) {
        try {
          URL context = new URL(base);
          URL u = new URL(context, href);
          remote = u.toExternalForm();
        }
        catch (MalformedURLException ex) {
          return resolved.createTextNode("Unresolvable URL "
           + base + "/" + href);
        }
      }
      else {
          remote = href;
      }

      if (parse) {
                 // checks for equality (OK) or identity (not OK)????
        if (bases.contains(remote)) {
          // need to figure out how to get file and number where
          // bad include occurs
          throw new CircularIncludeException(
            "Circular XInclude Reference to "
           + remote + " in " );
        }

        try {
          parser.parse(remote);
          Document doc = parser.getDocument();
          bases.push(remote);
          result = (Element) resolve(doc.getDocumentElement(), bases, resolved);
          bases.pop();
        }
        // Make this configurable
        catch (SAXException e) {
           return resolved.createTextNode("Document "
            + remote + " is not well-formed.\r\n" + e.getMessage());
        }
        catch (IOException e) {
           return resolved.createTextNode("Document not found: "
            + remote + "\r\n" + e.getMessage());
        }
      }
      else { // insert text
        String s = downloadTextDocument(remote);
        return resolved.createTextNode(s);
      }

    }
    // not an include element
    else { // recursively process children
       // still need to adjust bases here????
       result = (Element) resolved.importNode(original, false);
       NodeList children = original.getChildNodes();
       for (int i = 0; i < children.getLength(); i++) {
         Node n = children.item(i);
         if (n instanceof Element) {
           Element e = (Element) n;
           result.appendChild(resolve(e, bases, resolved));
         }
         else {
           result.appendChild(resolved.importNode(n,true));
         }
       }
    }

    return result;

  }

  /**
    * <p>
    * This utility method reads a document at a specified URL
    * and returns the contents of that document as a <code>Text</code>.
    * It's used to include files with <code>parse="text"</code>
    * </p>
    *
    * <p>
    * If the document cannot be located due to an IOException,
    * then an error message string is returned. I'm not yet convinced this
    * is the right behavior. Perhaps I should pass on the exception?
    * </p>
    *
    * @param url      URL of the doucment that will be stored in
    *                 <code>String</code>.
    * @return Text  The document retrieved from the source <code>URL</code>
    *                 or an error message if the document can't be retrieved.
    *                 Note: throwing an exception might be better here. I should
    *                 at least allow the setting of the eror message.
    */
    public static String downloadTextDocument(String url) {

        URL source;
        try {
          source = new URL(url);
        }
        catch (MalformedURLException ex) {
          return "Unresolvable URL " + url;
        }
        StringBuffer s = new StringBuffer();
        try {
          InputStream in = new BufferedInputStream(source.openStream());
          // does XInclude give you anything to specify the character set????
          InputStreamReader reader = new InputStreamReader(in, "8859_1");
          int c;
          while ((c = in.read()) != -1) {
            if (c == '<') s.append("&lt;");
            else if (c == '&') s.append("&amp;");
            else s.append((char) c);
          }
          return s.toString();
        }
        catch (IOException e) {
          return "Document not found: " + source.toExternalForm();
        }

    }

    /**
      * <p>
      * The driver method for the XIncluder program.
      * I'll probably move this to a separate class soon.
      * </p>
      *
      * @param args  <code>args[0]</code> contains the URL or file name
      *              of the document to be procesed.
      */
    public static void main(String[] args) {

        DOMParser parser = new DOMParser();
        XMLSerializer outputter = new XMLSerializer();
        for (int i = 0; i < args.length; i++) {
          try {
            parser.parse(args[i]);
            Document input = parser.getDocument();
            // absolutize URL
            String base = args[i];
            if (base.indexOf(':') < 0) {
              File f = new File(base);
              base = f.toURL().toExternalForm();
            }
            Document output = resolve(input, base);
            // need to set encoding on this to Latin-1 and check what
            // happens to UTF-8 curly quotes

            OutputFormat format = new OutputFormat("XML", "ISO-8859-1", false);
            format.setPreserveSpace(true);
            XMLSerializer serializer
             = new XMLSerializer(System.out, format);
            serializer.serialize(output);
          }
          catch (Exception e) {
            System.err.println(e);
            e.printStackTrace();
          }
        }

    }

}

To Learn More


Index | Cafe con Leche

Copyright 2000, 2001 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified March 8, 2001