1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 package org.jaxen.saxpath.base;
50
51 class XPathLexer
52 {
53 private String xpath;
54 private int currentPosition;
55 private int endPosition;
56 private boolean expectOperator = false;
57
58 XPathLexer(String xpath)
59 {
60 setXPath( xpath );
61 }
62
63 private void setXPath(String xpath)
64 {
65 this.xpath = xpath;
66 this.currentPosition = 0;
67 this.endPosition = xpath.length();
68 }
69
70 String getXPath()
71 {
72 return this.xpath;
73 }
74
75 Token nextToken()
76 {
77 Token token = null;
78
79 do
80 {
81 token = null;
82
83 switch ( LA(1) )
84 {
85 case '$':
86 {
87 token = dollar();
88 break;
89 }
90
91 case '"':
92 case '\'':
93 {
94 token = literal();
95 break;
96 }
97
98 case '/':
99 {
100 token = slashes();
101 break;
102 }
103
104 case ',':
105 {
106 token = comma();
107 break;
108 }
109
110 case '(':
111 {
112 token = leftParen();
113 break;
114 }
115
116 case ')':
117 {
118 token = rightParen();
119 break;
120 }
121
122 case '[':
123 {
124 token = leftBracket();
125 break;
126 }
127
128 case ']':
129 {
130 token = rightBracket();
131 break;
132 }
133
134 case '+':
135 {
136 token = plus();
137 break;
138 }
139
140 case '-':
141 {
142 token = minus();
143 break;
144 }
145
146 case '<':
147 case '>':
148 {
149 token = relationalOperator();
150 break;
151 }
152
153 case '=':
154 {
155 token = equals();
156 break;
157 }
158
159 case '!':
160 {
161 if ( LA(2) == '=' )
162 {
163 token = notEquals();
164 }
165 break;
166 }
167
168 case '|':
169 {
170 token = pipe();
171 break;
172 }
173
174 case '@':
175 {
176 token = at();
177 break;
178 }
179
180 case ':':
181 {
182 if ( LA(2) == ':' )
183 {
184 token = doubleColon();
185 }
186 else
187 {
188 token = colon();
189 }
190 break;
191 }
192
193 case '*':
194 {
195 token = star();
196 break;
197 }
198
199 case '.':
200 {
201 switch ( LA(2) )
202 {
203 case '0':
204 case '1':
205 case '2':
206 case '3':
207 case '4':
208 case '5':
209 case '6':
210 case '7':
211 case '8':
212 case '9':
213 {
214 token = number();
215 break;
216 }
217 default:
218 {
219 token = dots();
220 break;
221 }
222 }
223 break;
224 }
225
226 case '0':
227 case '1':
228 case '2':
229 case '3':
230 case '4':
231 case '5':
232 case '6':
233 case '7':
234 case '8':
235 case '9':
236 {
237 token = number();
238 break;
239 }
240
241 case ' ':
242 case '\t':
243 case '\n':
244 case '\r':
245 {
246 token = whitespace();
247 break;
248 }
249
250 default:
251 {
252 if ( Verifier.isXMLNCNameStartCharacter( LA(1) ) )
253 {
254 token = identifierOrOperatorName();
255 }
256 }
257 }
258
259 if ( token == null )
260 {
261 if (!hasMoreChars())
262 {
263 token = new Token( TokenTypes.EOF,
264 getXPath(),
265 this.currentPosition,
266 this.endPosition );
267 }
268 else
269 {
270 token = new Token( TokenTypes.ERROR,
271 getXPath(),
272 this.currentPosition,
273 this.endPosition );
274 }
275 }
276
277 }
278 while (token.getTokenType() == TokenTypes.SKIP );
279
280
281
282
283
284
285
286
287
288
289
290
291 switch ( token.getTokenType() )
292 {
293 case TokenTypes.AT:
294 case TokenTypes.DOUBLE_COLON:
295 case TokenTypes.LEFT_PAREN:
296 case TokenTypes.LEFT_BRACKET:
297 case TokenTypes.AND:
298 case TokenTypes.OR:
299 case TokenTypes.MOD:
300 case TokenTypes.DIV:
301 case TokenTypes.COLON:
302 case TokenTypes.SLASH:
303 case TokenTypes.DOUBLE_SLASH:
304 case TokenTypes.PIPE:
305 case TokenTypes.DOLLAR:
306 case TokenTypes.PLUS:
307 case TokenTypes.MINUS:
308 case TokenTypes.STAR_OPERATOR:
309 case TokenTypes.COMMA:
310 case TokenTypes.LESS_THAN_SIGN:
311 case TokenTypes.GREATER_THAN_SIGN:
312 case TokenTypes.LESS_THAN_OR_EQUALS_SIGN:
313 case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN:
314 case TokenTypes.EQUALS:
315 case TokenTypes.NOT_EQUALS:
316 {
317 expectOperator = false;
318 break;
319 }
320 default:
321 {
322 expectOperator = true;
323 break;
324 }
325 }
326
327 return token;
328 }
329
330 private Token identifierOrOperatorName()
331 {
332 Token token = null;
333 if ( expectOperator ) {
334 token = operatorName();
335 } else {
336 token = identifier();
337 }
338 return token;
339 }
340
341 private Token identifier()
342 {
343 Token token = null;
344
345 int start = this.currentPosition;
346
347 while ( hasMoreChars() )
348 {
349 if ( Verifier.isXMLNCNameCharacter( LA(1) ) )
350 {
351 consume();
352 }
353 else
354 {
355 break;
356 }
357 }
358
359 token = new Token( TokenTypes.IDENTIFIER,
360 getXPath(),
361 start,
362 this.currentPosition );
363
364 return token;
365 }
366
367 private Token operatorName()
368 {
369 Token token = null;
370
371 switch ( LA(1) )
372 {
373 case 'a':
374 {
375 token = and();
376 break;
377 }
378
379 case 'o':
380 {
381 token = or();
382 break;
383 }
384
385 case 'm':
386 {
387 token = mod();
388 break;
389 }
390
391 case 'd':
392 {
393 token = div();
394 break;
395 }
396 }
397
398 return token;
399 }
400
401 private Token mod()
402 {
403 Token token = null;
404
405 if ( ( LA(1) == 'm' )
406 &&
407 ( LA(2) == 'o' )
408 &&
409 ( LA(3) == 'd' )
410 )
411 {
412 token = new Token( TokenTypes.MOD,
413 getXPath(),
414 this.currentPosition,
415 this.currentPosition+3 );
416
417 consume();
418 consume();
419 consume();
420 }
421
422 return token;
423 }
424
425 private Token div()
426 {
427 Token token = null;
428
429 if ( ( LA(1) == 'd' )
430 &&
431 ( LA(2) == 'i' )
432 &&
433 ( LA(3) == 'v' )
434 )
435 {
436 token = new Token( TokenTypes.DIV,
437 getXPath(),
438 this.currentPosition,
439 this.currentPosition+3 );
440
441 consume();
442 consume();
443 consume();
444 }
445
446 return token;
447 }
448
449 private Token and()
450 {
451 Token token = null;
452
453 if ( ( LA(1) == 'a' )
454 &&
455 ( LA(2) == 'n' )
456 &&
457 ( LA(3) == 'd' )
458 )
459 {
460 token = new Token( TokenTypes.AND,
461 getXPath(),
462 this.currentPosition,
463 this.currentPosition+3 );
464
465 consume();
466 consume();
467 consume();
468 }
469
470 return token;
471 }
472
473 private Token or()
474 {
475 Token token = null;
476
477 if ( ( LA(1) == 'o' )
478 &&
479 ( LA(2) == 'r' )
480 )
481 {
482 token = new Token( TokenTypes.OR,
483 getXPath(),
484 this.currentPosition,
485 this.currentPosition+2 );
486
487 consume();
488 consume();
489 }
490
491 return token;
492 }
493
494 private Token number()
495 {
496 int start = this.currentPosition;
497 boolean periodAllowed = true;
498
499 loop:
500 while( true )
501 {
502 switch ( LA(1) )
503 {
504 case '.':
505 if ( periodAllowed )
506 {
507 periodAllowed = false;
508 consume();
509 }
510 else
511 {
512 break loop;
513 }
514 break;
515 case '0':
516 case '1':
517 case '2':
518 case '3':
519 case '4':
520 case '5':
521 case '6':
522 case '7':
523 case '8':
524 case '9':
525 consume();
526 break;
527 default:
528 break loop;
529 }
530 }
531
532 return new Token( TokenTypes.DOUBLE,
533 getXPath(),
534 start,
535 this.currentPosition );
536 }
537
538 private Token whitespace()
539 {
540 consume();
541
542 loop:
543 while( hasMoreChars() )
544 {
545 switch ( LA(1) )
546 {
547 case ' ':
548 case '\t':
549 case '\n':
550 case '\r':
551 {
552 consume();
553 break;
554 }
555
556 default:
557 {
558 break loop;
559 }
560 }
561 }
562
563 return new Token( TokenTypes.SKIP,
564 getXPath(),
565 0,
566 0 );
567 }
568
569 private Token comma()
570 {
571 Token token = new Token( TokenTypes.COMMA,
572 getXPath(),
573 this.currentPosition,
574 this.currentPosition+1 );
575
576 consume();
577
578 return token;
579 }
580
581 private Token equals()
582 {
583 Token token = new Token( TokenTypes.EQUALS,
584 getXPath(),
585 this.currentPosition,
586 this.currentPosition+1 );
587
588 consume();
589
590 return token;
591 }
592
593 private Token minus()
594 {
595 Token token = new Token( TokenTypes.MINUS,
596 getXPath(),
597 this.currentPosition,
598 this.currentPosition+1 );
599 consume();
600
601 return token;
602 }
603
604 private Token plus()
605 {
606 Token token = new Token( TokenTypes.PLUS,
607 getXPath(),
608 this.currentPosition,
609 this.currentPosition+1 );
610 consume();
611
612 return token;
613 }
614
615 private Token dollar()
616 {
617 Token token = new Token( TokenTypes.DOLLAR,
618 getXPath(),
619 this.currentPosition,
620 this.currentPosition+1 );
621 consume();
622
623 return token;
624 }
625
626 private Token pipe()
627 {
628 Token token = new Token( TokenTypes.PIPE,
629 getXPath(),
630 this.currentPosition,
631 this.currentPosition+1 );
632
633 consume();
634
635 return token;
636 }
637
638 private Token at()
639 {
640 Token token = new Token( TokenTypes.AT,
641 getXPath(),
642 this.currentPosition,
643 this.currentPosition+1 );
644
645 consume();
646
647 return token;
648 }
649
650 private Token colon()
651 {
652 Token token = new Token( TokenTypes.COLON,
653 getXPath(),
654 this.currentPosition,
655 this.currentPosition+1 );
656 consume();
657
658 return token;
659 }
660
661 private Token doubleColon()
662 {
663 Token token = new Token( TokenTypes.DOUBLE_COLON,
664 getXPath(),
665 this.currentPosition,
666 this.currentPosition+2 );
667
668 consume();
669 consume();
670
671 return token;
672 }
673
674 private Token notEquals()
675 {
676 Token token = new Token( TokenTypes.NOT_EQUALS,
677 getXPath(),
678 this.currentPosition,
679 this.currentPosition + 2 );
680
681 consume();
682 consume();
683
684 return token;
685 }
686
687 private Token relationalOperator()
688 {
689 Token token = null;
690
691 switch ( LA(1) )
692 {
693 case '<':
694 {
695 if ( LA(2) == '=' )
696 {
697 token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN,
698 getXPath(),
699 this.currentPosition,
700 this.currentPosition + 2 );
701 consume();
702 }
703 else
704 {
705 token = new Token( TokenTypes.LESS_THAN_SIGN,
706 getXPath(),
707 this.currentPosition,
708 this.currentPosition + 1);
709 }
710
711 consume();
712 break;
713 }
714 case '>':
715 {
716 if ( LA(2) == '=' )
717 {
718 token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN,
719 getXPath(),
720 this.currentPosition,
721 this.currentPosition + 2 );
722 consume();
723 }
724 else
725 {
726 token = new Token( TokenTypes.GREATER_THAN_SIGN,
727 getXPath(),
728 this.currentPosition,
729 this.currentPosition + 1 );
730 }
731
732 consume();
733 break;
734 }
735 }
736
737 return token;
738
739 }
740
741
742 private Token star()
743 {
744 int tokenType = expectOperator ? TokenTypes.STAR_OPERATOR : TokenTypes.STAR;
745 Token token = new Token( tokenType,
746 getXPath(),
747 this.currentPosition,
748 this.currentPosition+1 );
749
750 consume();
751
752 return token;
753 }
754
755 private Token literal()
756 {
757 Token token = null;
758
759 char match = LA(1);
760
761 consume();
762
763 int start = this.currentPosition;
764
765 while ( ( token == null )
766 &&
767 hasMoreChars() )
768 {
769 if ( LA(1) == match )
770 {
771 token = new Token( TokenTypes.LITERAL,
772 getXPath(),
773 start,
774 this.currentPosition );
775 }
776 consume();
777 }
778
779 return token;
780 }
781
782 private Token dots()
783 {
784 Token token = null;
785
786 switch ( LA(2) )
787 {
788 case '.':
789 {
790 token = new Token( TokenTypes.DOT_DOT,
791 getXPath(),
792 this.currentPosition,
793 this.currentPosition+2 ) ;
794 consume();
795 consume();
796 break;
797 }
798 default:
799 {
800 token = new Token( TokenTypes.DOT,
801 getXPath(),
802 this.currentPosition,
803 this.currentPosition+1 );
804 consume();
805 break;
806 }
807 }
808
809 return token;
810 }
811
812 private Token leftBracket()
813 {
814 Token token = new Token( TokenTypes.LEFT_BRACKET,
815 getXPath(),
816 this.currentPosition,
817 this.currentPosition+1 );
818
819 consume();
820
821 return token;
822 }
823
824 private Token rightBracket()
825 {
826 Token token = new Token( TokenTypes.RIGHT_BRACKET,
827 getXPath(),
828 this.currentPosition,
829 this.currentPosition+1 );
830
831 consume();
832
833 return token;
834 }
835
836 private Token leftParen()
837 {
838 Token token = new Token( TokenTypes.LEFT_PAREN,
839 getXPath(),
840 this.currentPosition,
841 this.currentPosition+1 );
842
843 consume();
844
845 return token;
846 }
847
848 private Token rightParen()
849 {
850 Token token = new Token( TokenTypes.RIGHT_PAREN,
851 getXPath(),
852 this.currentPosition,
853 this.currentPosition+1 );
854
855 consume();
856
857 return token;
858 }
859
860 private Token slashes()
861 {
862 Token token = null;
863
864 switch ( LA(2) )
865 {
866 case '/':
867 {
868 token = new Token( TokenTypes.DOUBLE_SLASH,
869 getXPath(),
870 this.currentPosition,
871 this.currentPosition+2 );
872 consume();
873 consume();
874 break;
875 }
876 default:
877 {
878 token = new Token( TokenTypes.SLASH,
879 getXPath(),
880 this.currentPosition,
881 this.currentPosition+1 );
882 consume();
883 }
884 }
885
886 return token;
887 }
888
889 private char LA(int i)
890 {
891 if ( currentPosition + ( i - 1 ) >= this.endPosition )
892 {
893 return (char) -1;
894 }
895
896 return getXPath().charAt( this.currentPosition + (i - 1) );
897 }
898
899 private void consume()
900 {
901 ++this.currentPosition;
902 }
903
904 private boolean hasMoreChars()
905 {
906 return this.currentPosition < this.endPosition;
907 }
908
909 }