1 /* 2 * $Header$ 3 * $Revision$ 4 * $Date$ 5 * 6 * ==================================================================== 7 * 8 * Copyright 2000-2002 bob mcwhirter & James Strachan. 9 * All rights reserved. 10 * 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions are 14 * met: 15 * 16 * * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * * Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * * Neither the name of the Jaxen Project nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 28 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 30 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 31 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 33 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 34 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 * 39 * ==================================================================== 40 * This software consists of voluntary contributions made by many 41 * individuals on behalf of the Jaxen Project and was originally 42 * created by bob mcwhirter <bob@werken.com> and 43 * James Strachan <jstrachan@apache.org>. For more information on the 44 * Jaxen Project, please see <http://www.jaxen.org/>. 45 * 46 */ 47 package org.jaxen.function; 48 49 import java.util.List; 50 51 import org.jaxen.Context; 52 import org.jaxen.Function; 53 import org.jaxen.FunctionCallException; 54 import org.jaxen.Navigator; 55 /** 56 * <p> 57 * <b>4.2</b> 58 * <code><i>string</i> substring(<i>string</i>,<i>number</i>,<i>number?</i>)</code> 59 * </p> 60 * 61 * <blockquote cite="http://www.w3.org/TR/xpath"> 62 * <p>The <b>substring</b> function returns the 63 * substring of the first argument starting at the position specified in 64 * the second argument with length specified in the third argument. For 65 * example, 66 * 67 * <code>substring("12345",2,3)</code> returns <code>"234"</code>. 68 * If the third argument is not specified, it returns the substring 69 * starting at the position specified in the second argument and 70 * continuing to the end of the string. For example, 71 * <code>substring("12345",2)</code> returns <code>"2345"</code>. 72 * </p> 73 * 74 * <p> 75 * More precisely, each character in the string (see <a 76 * href="https://www.w3.org/TR/xpath#strings">[<b>3.6 Strings</b>]</a>) is considered to have a 77 * numeric position: the position of the first character is 1, the 78 * position of the second character is 2 and so on. 79 * </p> 80 * 81 * <blockquote> <b>NOTE: </b>This differs from Java and ECMAScript, in 82 * which the <code>String.substring</code> method treats the position 83 * of the first character as 0.</blockquote> 84 * 85 * <p> 86 * The returned substring contains those characters for which the 87 * position of the character is greater than or equal to the rounded 88 * value of the second argument and, if the third argument is specified, 89 * less than the sum of the rounded value of the second argument and the 90 * rounded value of the third argument; the comparisons and addition 91 * used for the above follow the standard IEEE 754 rules; rounding is 92 * done as if by a call to the <b><a href="#function-round">round</a></b> 93 * function. The following examples illustrate various unusual cases: 94 * </p> 95 * 96 * <ul> 97 * 98 * <li> 99 * <p> 100 * <code>substring("12345", 1.5, 2.6)</code> returns 101 * <code>"234"</code> 102 * </p> 103 * </li> 104 * 105 * <li> 106 * <p> 107 * <code>substring("12345", 0, 3)</code> returns <code>"12"</code> 108 * 109 * </p> 110 * </li> 111 * 112 * <li> 113 * <p> 114 * <code>substring("12345", 0 div 0, 3)</code> returns <code>""</code> 115 * </p> 116 * </li> 117 * 118 * <li> 119 * <p>. 120 * <code>substring("12345", 1, 0 div 0)</code> returns 121 * 122 * <code>""</code> 123 * </p> 124 * </li> 125 * 126 * <li> 127 * <p> 128 * <code>substring("12345", -42, 1 div 0)</code> returns 129 * <code>"12345"</code> 130 * </p> 131 * </li> 132 * 133 * </ul> 134 * 135 * <code>substring("12345", -1 div 0, 1 div 0)</code> returns 136 * <code>""</code> </blockquote> 137 * 138 * @author bob mcwhirter (bob @ werken.com) 139 * 140 * @see <a href="https://www.w3.org/TR/xpath#function-substring" 141 * target="_top">Section 4.2 of the XPath Specification</a> 142 */ 143 public class SubstringFunction implements Function 144 { 145 146 /** 147 * Create a new <code>SubstringFunction</code> object. 148 */ 149 public SubstringFunction() {} 150 151 152 /** Returns a substring of an XPath string-value by character index. 153 * 154 * @param context the context at the point in the 155 * expression when the function is called 156 * @param args a list that contains two or three items 157 * 158 * @return a <code>String</code> containing the specifed character subsequence of 159 * the original string or the string-value of the context node 160 * 161 * @throws FunctionCallException if <code>args</code> has more than three 162 * or less than two items 163 */ 164 public Object call(Context context, 165 List args) throws FunctionCallException 166 { 167 final int argc = args.size(); 168 if (argc < 2 || argc > 3){ 169 throw new FunctionCallException( "substring() requires two or three arguments." ); 170 } 171 172 final Navigator nav = context.getNavigator(); 173 174 final String str = StringFunction.evaluate(args.get(0), nav ); 175 // The spec doesn't really address this case 176 if (str == null) { 177 return ""; 178 } 179 180 final int stringLength = (StringLengthFunction.evaluate(args.get(0), nav )).intValue(); 181 182 if (stringLength == 0) { 183 return ""; 184 } 185 186 Double d1 = NumberFunction.evaluate(args.get(1), nav); 187 188 if (d1.isNaN()){ 189 return ""; 190 } 191 // Round the value and subtract 1 as Java strings are zero based 192 int start = RoundFunction.evaluate(d1, nav).intValue() - 1; 193 194 int substringLength = stringLength; 195 if (argc == 3){ 196 Double d2 = NumberFunction.evaluate(args.get(2), nav); 197 198 if (!d2.isNaN()){ 199 substringLength = RoundFunction.evaluate(d2, nav ).intValue(); 200 } 201 else { 202 substringLength = 0; 203 } 204 } 205 206 if (substringLength < 0) return ""; 207 208 int end = start + substringLength; 209 if (argc == 2) end = stringLength; 210 211 // negative start is treated as 0 212 if ( start < 0){ 213 start = 0; 214 } 215 else if (start > stringLength){ 216 return ""; 217 } 218 219 if (end > stringLength){ 220 end = stringLength; 221 } 222 else if (end < start) return ""; 223 224 if (stringLength == str.length()) { 225 // easy case; no surrogate pairs 226 return str.substring(start, end); 227 } 228 else { 229 return unicodeSubstring(str, start, end); 230 } 231 232 } 233 234 private static String unicodeSubstring(String s, int start, int end) { 235 236 StringBuffer result = new StringBuffer(s.length()); 237 for (int jChar = 0, uChar=0; uChar < end; jChar++, uChar++) { 238 char c = s.charAt(jChar); 239 if (uChar >= start) result.append(c); 240 if (c >= 0xD800) { // get the low surrogate 241 // ???? we could check here that this is indeed a low surroagte 242 // we could also catch StringIndexOutOfBoundsException 243 jChar++; 244 if (uChar >= start) result.append(s.charAt(jChar)); 245 } 246 } 247 return result.toString(); 248 } 249 }