View Javadoc
1   /*
2    * $Header$
3    * $Revision$
4    * $Date$
5    *
6    * ====================================================================
7    *
8    * Copyright 2000-2002 bob mcwhirter & James Strachan.
9    * All rights reserved.
10   *
11   *
12   * Redistribution and use in source and binary forms, with or without
13   * modification, are permitted provided that the following conditions are
14   * met:
15   * 
16   *   * Redistributions of source code must retain the above copyright
17   *     notice, this list of conditions and the following disclaimer.
18   * 
19   *   * Redistributions in binary form must reproduce the above copyright
20   *     notice, this list of conditions and the following disclaimer in the
21   *     documentation and/or other materials provided with the distribution.
22   * 
23   *   * Neither the name of the Jaxen Project nor the names of its
24   *     contributors may be used to endorse or promote products derived 
25   *     from this software without specific prior written permission.
26   * 
27   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
28   * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29   * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
30   * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
31   * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32   * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33   * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34   * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35   * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36   * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38   *
39   * ====================================================================
40   * This software consists of voluntary contributions made by many
41   * individuals on behalf of the Jaxen Project and was originally
42   * created by bob mcwhirter <bob@werken.com> and
43   * James Strachan <jstrachan@apache.org>.  For more information on the
44   * Jaxen Project, please see <http://www.jaxen.org/>.
45   *
46   */
47  package org.jaxen.function;
48  
49  import java.util.List;
50  
51  import org.jaxen.Context;
52  import org.jaxen.Function;
53  import org.jaxen.FunctionCallException;
54  import org.jaxen.Navigator;
55  /**
56   * <p>
57   * <b>4.2</b>
58   * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code>
59   * </p>
60   * 
61   * <blockquote cite="http://www.w3.org/TR/xpath"> 
62   * <p>The <b>substring</b> function returns the
63   * substring of the first argument starting at the position specified in
64   * the second argument with length specified in the third argument. For
65   * example,
66   * 
67   * <code>substring("12345",2,3)</code> returns <code>"234"</code>.
68   * If the third argument is not specified, it returns the substring
69   * starting at the position specified in the second argument and
70   * continuing to the end of the string. For example,
71   * <code>substring("12345",2)</code> returns <code>"2345"</code>.
72   * </p>
73   * 
74   * <p>
75   * More precisely, each character in the string (see <a
76   * href="https://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a
77   * numeric position: the position of the first character is 1, the
78   * position of the second character is 2 and so on.
79   * </p>
80   * 
81   * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in
82   * which the <code>String.substring</code> method treats the position
83   * of the first character as 0.</blockquote>
84   * 
85   * <p>
86   * The returned substring contains those characters for which the
87   * position of the character is greater than or equal to the rounded
88   * value of the second argument and, if the third argument is specified,
89   * less than the sum of the rounded value of the second argument and the
90   * rounded value of the third argument; the comparisons and addition
91   * used for the above follow the standard IEEE 754 rules; rounding is
92   * done as if by a call to the <b><a href="#function-round">round</a></b>
93   * function. The following examples illustrate various unusual cases:
94   * </p>
95   * 
96   * <ul>
97   * 
98   * <li>
99   * <p>
100  * <code>substring("12345", 1.5, 2.6)</code> returns
101  * <code>"234"</code>
102  * </p>
103  * </li>
104  * 
105  * <li>
106  * <p>
107  * <code>substring("12345", 0, 3)</code> returns <code>"12"</code>
108  * 
109  * </p>
110  * </li>
111  * 
112  * <li>
113  * <p>
114  * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code>
115  * </p>
116  * </li>
117  * 
118  * <li>
119  * <p>.
120  * <code>substring("12345", 1, 0 div 0)</code> returns
121  * 
122  * <code>""</code>
123  * </p>
124  * </li>
125  * 
126  * <li>
127  * <p>
128  * <code>substring("12345", -42, 1 div 0)</code> returns
129  * <code>"12345"</code>
130  * </p>
131  * </li>
132  * 
133  * </ul>
134  * 
135  * <code>substring("12345", -1 div 0, 1 div 0)</code> returns
136  * <code>""</code> </blockquote>
137  * 
138  * @author bob mcwhirter (bob @ werken.com)
139  * 
140  * @see <a href="https://www.w3.org/TR/xpath#function-substring"
141  *      target="_top">Section 4.2 of the XPath Specification</a>
142  */
143 public class SubstringFunction implements Function
144 {
145 
146     /**
147      * Create a new <code>SubstringFunction</code> object.
148      */
149     public SubstringFunction() {}
150 
151     
152     /** Returns a substring of an XPath string-value by character index.
153      *
154      * @param context the context at the point in the
155      *         expression when the function is called
156      * @param args a list that contains two or three items
157      * 
158      * @return a <code>String</code> containing the specifed character subsequence of 
159      *     the original string or the string-value of the context node
160      * 
161      * @throws FunctionCallException if <code>args</code> has more than three
162      *     or less than two items
163      */
164     public Object call(Context context,
165                        List args) throws FunctionCallException
166     {
167         final int argc = args.size();
168         if (argc < 2 || argc > 3){
169             throw new FunctionCallException( "substring() requires two or three arguments." );
170         }
171 
172         final Navigator nav = context.getNavigator();
173 
174         final String str = StringFunction.evaluate(args.get(0), nav );
175         // The spec doesn't really address this case
176         if (str == null) {
177             return "";
178         }
179 
180         final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue();
181 
182         if (stringLength == 0) {
183             return "";
184         }
185 
186         Double d1 = NumberFunction.evaluate(args.get(1), nav);
187 
188         if (d1.isNaN()){
189             return "";
190         }
191         // Round the value and subtract 1 as Java strings are zero based
192         int start = RoundFunction.evaluate(d1, nav).intValue() - 1;
193 
194         int substringLength = stringLength;
195         if (argc == 3){
196             Double d2 = NumberFunction.evaluate(args.get(2), nav);
197 
198             if (!d2.isNaN()){
199                 substringLength = RoundFunction.evaluate(d2, nav ).intValue();
200             }
201             else {
202                 substringLength = 0;
203             }
204         }
205         
206         if (substringLength < 0) return "";
207 
208         int end = start + substringLength;
209         if (argc == 2) end = stringLength;
210             
211         // negative start is treated as 0
212         if ( start < 0){
213             start = 0;
214         }
215         else if (start > stringLength){
216             return "";
217         }
218 
219         if (end > stringLength){
220             end = stringLength;
221         }
222         else if (end < start) return "";
223         
224         if (stringLength == str.length()) {
225             // easy case; no surrogate pairs
226             return str.substring(start, end);
227         }
228         else {
229             return unicodeSubstring(str, start, end);
230         }
231         
232     }
233 
234     private static String unicodeSubstring(String s, int start, int end) {
235 
236         StringBuffer result = new StringBuffer(s.length());
237         for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) {
238             char c = s.charAt(jChar);
239             if (uChar >= start) result.append(c);
240             if (c >= 0xD800) { // get the low surrogate
241                 // ???? we could check here that this is indeed a low surroagte
242                 // we could also catch StringIndexOutOfBoundsException
243                 jChar++;
244                 if (uChar >= start) result.append(s.charAt(jChar));
245             }
246         }
247         return result.toString();
248     }
249 }