1 # Copyright 2007-2014 Greg Hurrell. All rights reserved.
2 # Licensed under the terms of the BSD 2-clause license.
6 describe Walrat::RegexpParslet do
8 @parslet = Walrat::RegexpParslet.new(/[a-zA-Z_][a-zA-Z0-9_]*/)
11 it 'raises an ArgumentError if initialized with nil' do
13 Walrat::RegexpParslet.new nil
14 end.to raise_error(ArgumentError, /nil regexp/)
17 it 'parse should succeed if the input string matches' do
18 lambda { @parslet.parse('an_identifier') }.should_not raise_error
19 lambda { @parslet.parse('An_Identifier') }.should_not raise_error
20 lambda { @parslet.parse('AN_IDENTIFIER') }.should_not raise_error
21 lambda { @parslet.parse('an_identifier1') }.should_not raise_error
22 lambda { @parslet.parse('An_Identifier1') }.should_not raise_error
23 lambda { @parslet.parse('AN_IDENTIFIER1') }.should_not raise_error
24 lambda { @parslet.parse('a') }.should_not raise_error
25 lambda { @parslet.parse('A') }.should_not raise_error
26 lambda { @parslet.parse('a9') }.should_not raise_error
27 lambda { @parslet.parse('A9') }.should_not raise_error
28 lambda { @parslet.parse('_identifier') }.should_not raise_error
29 lambda { @parslet.parse('_Identifier') }.should_not raise_error
30 lambda { @parslet.parse('_IDENTIFIER') }.should_not raise_error
31 lambda { @parslet.parse('_9Identifier') }.should_not raise_error
32 lambda { @parslet.parse('_') }.should_not raise_error
35 it 'parse should succeed if the input string matches, even if it continues after the match' do
36 lambda { @parslet.parse('an_identifier, more') }.should_not raise_error
37 lambda { @parslet.parse('An_Identifier, more') }.should_not raise_error
38 lambda { @parslet.parse('AN_IDENTIFIER, more') }.should_not raise_error
39 lambda { @parslet.parse('an_identifier1, more') }.should_not raise_error
40 lambda { @parslet.parse('An_Identifier1, more') }.should_not raise_error
41 lambda { @parslet.parse('AN_IDENTIFIER1, more') }.should_not raise_error
42 lambda { @parslet.parse('a, more') }.should_not raise_error
43 lambda { @parslet.parse('A, more') }.should_not raise_error
44 lambda { @parslet.parse('a9, more') }.should_not raise_error
45 lambda { @parslet.parse('A9, more') }.should_not raise_error
46 lambda { @parslet.parse('_identifier, more') }.should_not raise_error
47 lambda { @parslet.parse('_Identifier, more') }.should_not raise_error
48 lambda { @parslet.parse('_IDENTIFIER, more') }.should_not raise_error
49 lambda { @parslet.parse('_9Identifier, more') }.should_not raise_error
50 lambda { @parslet.parse('_, more') }.should_not raise_error
53 it 'parse should return a MatchDataWrapper object' do
54 @parslet.parse('an_identifier').should == 'an_identifier'
55 @parslet.parse('an_identifier, more').should == 'an_identifier'
58 it 'parse should raise an ArgumentError if passed nil' do
61 end.to raise_error(ArgumentError, /nil string/)
64 it 'parse should raise a ParseError if the input string does not match' do
65 lambda { @parslet.parse('9') }.should raise_error(Walrat::ParseError) # a number is not a valid identifier
66 lambda { @parslet.parse('9fff') }.should raise_error(Walrat::ParseError) # identifiers must not start with numbers
67 lambda { @parslet.parse(' identifier') }.should raise_error(Walrat::ParseError) # note the leading whitespace
68 lambda { @parslet.parse('') }.should raise_error(Walrat::ParseError) # empty strings can't match
71 it 'should be able to compare parslets for equality' do
72 /foo/.to_parseable.should eql(/foo/.to_parseable) # equal
73 /foo/.to_parseable.should_not eql(/bar/.to_parseable) # different
74 /foo/.to_parseable.should_not eql(/Foo/.to_parseable) # differing only in case
75 /foo/.to_parseable.should_not eql('foo') # totally different classes
78 it 'should accurately pack line and column ends into whatever gets returned from "parse"' do
80 parslet = /.+/m.to_parseable
81 result = parslet.parse('hello')
82 result.line_end.should == 0
83 result.column_end.should == 5
85 # single word with newline at end (UNIX style)
86 result = parslet.parse("hello\n")
87 result.line_end.should == 1
88 result.column_end.should == 0
90 # single word with newline at end (Classic Mac style)
91 result = parslet.parse("hello\r")
92 result.line_end.should == 1
93 result.column_end.should == 0
95 # single word with newline at end (Windows style)
96 result = parslet.parse("hello\r\n")
97 result.line_end.should == 1
98 result.column_end.should == 0
100 # two lines (UNIX style)
101 result = parslet.parse("hello\nworld")
102 result.line_end.should == 1
103 result.column_end.should == 5
105 # two lines (Classic Mac style)
106 result = parslet.parse("hello\rworld")
107 result.line_end.should == 1
108 result.column_end.should == 5
110 # two lines (Windows style)
111 result = parslet.parse("hello\r\nworld")
112 result.line_end.should == 1
113 result.column_end.should == 5
116 # in the case of RegexpParslets, the "last successfully scanned position" is
118 it 'line and column end should reflect last succesfully scanned position prior to failure' do
119 # fail right at start
120 parslet = /hello\r\nworld/.to_parseable
122 parslet.parse('foobar')
123 rescue Walrat::ParseError => e
126 exception.line_end.should == 0
127 exception.column_end.should == 0
129 # fail after 1 character
131 parslet.parse('hfoobar')
132 rescue Walrat::ParseError => e
135 exception.line_end.should == 0
136 exception.column_end.should == 0
138 # fail after end-of-line
140 parslet.parse("hello\r\nfoobar")
141 rescue Walrat::ParseError => e
144 exception.line_end.should == 0
145 exception.column_end.should == 0
149 describe 'chaining two regexp parslets together' do
150 it 'parslets should work in specified order' do
151 parslet = Walrat::RegexpParslet.new(/foo.\d/) &
152 Walrat::RegexpParslet.new(/bar.\d/)
153 parslet.parse('foo_1bar_2').should == ['foo_1', 'bar_2']
156 # Parser Expression Grammars match greedily
157 it 'parslets should match greedily' do
158 # the first parslet should gobble up the entire string, preventing the
159 # second parslet from succeeding
160 parslet = Walrat::RegexpParslet.new(/foo.+\d/) &
161 Walrat::RegexpParslet.new(/bar.+\d/)
162 lambda { parslet.parse('foo_1bar_2') }.should raise_error(Walrat::ParseError)
166 describe 'alternating two regexp parslets' do
167 it 'either parslet should apply to generate a match' do
168 parslet = Walrat::RegexpParslet.new(/\d+/) |
169 Walrat::RegexpParslet.new(/[A-Z]+/)
170 parslet.parse('ABC').should == 'ABC'
171 parslet.parse('123').should == '123'
174 it 'should fail if no parslet generates a match' do
175 parslet = Walrat::RegexpParslet.new(/\d+/) |
176 Walrat::RegexpParslet.new(/[A-Z]+/)
177 lambda { parslet.parse('abc') }.should raise_error(Walrat::ParseError)
180 it 'parslets should be tried in left-to-right order' do
181 # in this case the first parslet should win even though the second one is also a valid match
182 parslet = Walrat::RegexpParslet.new(/(.)(..)/) |
183 Walrat::RegexpParslet.new(/(..)(.)/)
184 match_data = parslet.parse('abc').match_data
185 match_data[1].should == 'a'
186 match_data[2].should == 'bc'
188 # here we swap the order; again the first parslet should win
189 parslet = Walrat::RegexpParslet.new(/(..)(.)/) |
190 Walrat::RegexpParslet.new(/(.)(..)/)
191 match_data = parslet.parse('abc').match_data
192 match_data[1].should == 'ab'
193 match_data[2].should == 'c'
197 describe 'chaining three regexp parslets' do
198 it 'parslets should work in specified order' do
199 parslet = Walrat::RegexpParslet.new(/foo.\d/) &
200 Walrat::RegexpParslet.new(/bar.\d/) &
201 Walrat::RegexpParslet.new(/.../)
202 parslet.parse('foo_1bar_2ABC').should == ['foo_1', 'bar_2', 'ABC']
206 describe 'alternating three regexp parslets' do
207 it 'any parslet should apply to generate a match' do
208 parslet = Walrat::RegexpParslet.new(/\d+/) |
209 Walrat::RegexpParslet.new(/[A-Z]+/) |
210 Walrat::RegexpParslet.new(/[a-z]+/)
211 parslet.parse('ABC').should == 'ABC'
212 parslet.parse('123').should == '123'
213 parslet.parse('abc').should == 'abc'
216 it 'should fail if no parslet generates a match' do
217 parslet = Walrat::RegexpParslet.new(/\d+/) |
218 Walrat::RegexpParslet.new(/[A-Z]+/) |
219 Walrat::RegexpParslet.new(/[a-z]+/)
220 lambda { parslet.parse(':::') }.should raise_error(Walrat::ParseError)
223 it 'parslets should be tried in left-to-right order' do
224 # in this case the first parslet should win even though the others also produce valid matches
225 parslet = Walrat::RegexpParslet.new(/(.)(..)/) |
226 Walrat::RegexpParslet.new(/(..)(.)/) |
227 Walrat::RegexpParslet.new(/(...)/)
228 match_data = parslet.parse('abc').match_data
229 match_data[1].should == 'a'
230 match_data[2].should == 'bc'
232 # here we swap the order; again the first parslet should win
233 parslet = Walrat::RegexpParslet.new(/(..)(.)/) |
234 Walrat::RegexpParslet.new(/(.)(..)/) |
235 Walrat::RegexpParslet.new(/(...)/)
236 match_data = parslet.parse('abc').match_data
237 match_data[1].should == 'ab'
238 match_data[2].should == 'c'
240 # similar test but this time the first parslet can't win (doesn't match)
241 parslet = Walrat::RegexpParslet.new(/foo/) |
242 Walrat::RegexpParslet.new(/(...)/) |
243 Walrat::RegexpParslet.new(/(.)(..)/)
244 match_data = parslet.parse('abc').match_data
245 match_data[1].should == 'abc'
249 describe 'combining chaining and alternation' do
250 it 'chaining should having higher precedence than alternation' do
251 # equivalent to /foo/ | ( /bar/ & /abc/ )
252 parslet = Walrat::RegexpParslet.new(/foo/) |
253 Walrat::RegexpParslet.new(/bar/) &
254 Walrat::RegexpParslet.new(/abc/)
255 parslet.parse('foo').should == 'foo' # succeed on first choice
256 parslet.parse('barabc').should == ['bar', 'abc'] # succeed on alternate path
257 lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # fail half-way down alternate path
258 lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately
260 # swap the order, now equivalent to: ( /bar/ & /abc/ ) | /foo/
261 parslet = Walrat::RegexpParslet.new(/bar/) &
262 Walrat::RegexpParslet.new(/abc/) |
263 Walrat::RegexpParslet.new(/foo/)
264 parslet.parse('barabc').should == ['bar', 'abc'] # succeed on first choice
265 parslet.parse('foo').should == 'foo' # succeed on alternate path
266 lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # fail half-way down first path
267 lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately
270 it 'should be able to override precedence using parentheses' do
271 # take first example above and make it ( /foo/ | /bar/ ) & /abc/
272 parslet = (Walrat::RegexpParslet.new(/foo/) |
273 Walrat::RegexpParslet.new(/bar/)) &
274 Walrat::RegexpParslet.new(/abc/)
275 parslet.parse('fooabc').should == ['foo', 'abc'] # first choice
276 parslet.parse('barabc').should == ['bar', 'abc'] # second choice
277 lambda { parslet.parse('foo...') }.should raise_error(Walrat::ParseError) # fail in second half
278 lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # another way of failing in second half
279 lambda { parslet.parse('foo') }.should raise_error(Walrat::ParseError) # another way of failing in second half
280 lambda { parslet.parse('bar') }.should raise_error(Walrat::ParseError) # another way of failing in second half
281 lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately
282 lambda { parslet.parse('abcfoo') }.should raise_error(Walrat::ParseError) # order matters
284 # take second example above and make it /bar/ & ( /abc/ | /foo/ )
285 parslet = Walrat::RegexpParslet.new(/bar/) &
286 (Walrat::RegexpParslet.new(/abc/) | Walrat::RegexpParslet.new(/foo/))
287 parslet.parse('barabc').should == ['bar', 'abc'] # succeed on first choice
288 parslet.parse('barfoo').should == ['bar', 'foo'] # second choice
289 lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # fail in second part
290 lambda { parslet.parse('bar') }.should raise_error(Walrat::ParseError) # another way to fail in second part
291 lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately
292 lambda { parslet.parse('abcbar') }.should raise_error(Walrat::ParseError) # order matters
295 it 'should be able to include long runs of sequences' do
297 parslet = Walrat::RegexpParslet.new(/a/) &
298 Walrat::RegexpParslet.new(/b/) &
299 Walrat::RegexpParslet.new(/c/) &
300 Walrat::RegexpParslet.new(/d/) |
301 Walrat::RegexpParslet.new(/e/)
302 parslet.parse('abcd').should == ['a', 'b', 'c', 'd']
303 parslet.parse('e').should == 'e'
304 lambda { parslet.parse('f') }.should raise_error(Walrat::ParseError)
307 it 'should be able to include long runs of options' do
309 parslet = Walrat::RegexpParslet.new(/a/) |
310 Walrat::RegexpParslet.new(/b/) |
311 Walrat::RegexpParslet.new(/c/) |
312 Walrat::RegexpParslet.new(/d/) &
313 Walrat::RegexpParslet.new(/e/)
314 parslet.parse('a').should == 'a'
315 parslet.parse('b').should == 'b'
316 parslet.parse('c').should == 'c'
317 parslet.parse('de').should == ['d', 'e']
318 lambda { parslet.parse('f') }.should raise_error(Walrat::ParseError)
321 it 'should be able to alternate repeatedly between sequences and choices' do
323 parslet = Walrat::RegexpParslet.new(/a/) &
324 Walrat::RegexpParslet.new(/b/) |
325 Walrat::RegexpParslet.new(/c/) &
326 Walrat::RegexpParslet.new(/d/) |
327 Walrat::RegexpParslet.new(/e/)
328 parslet.parse('ab').should == ['a', 'b']
329 parslet.parse('cd').should == ['c', 'd']
330 parslet.parse('e').should == 'e'
331 lambda { parslet.parse('f') }.should raise_error(Walrat::ParseError)
334 it 'should be able to combine long runs with alternation' do
335 # A & B & C | D | E | F & G & H
336 parslet = Walrat::RegexpParslet.new(/a/) &
337 Walrat::RegexpParslet.new(/b/) &
338 Walrat::RegexpParslet.new(/c/) |
339 Walrat::RegexpParslet.new(/d/) |
340 Walrat::RegexpParslet.new(/e/) |
341 Walrat::RegexpParslet.new(/f/) &
342 Walrat::RegexpParslet.new(/g/) &
343 Walrat::RegexpParslet.new(/h/)
344 parslet.parse('abc').should == ['a', 'b', 'c']
345 parslet.parse('d').should == 'd'
346 parslet.parse('e').should == 'e'
347 parslet.parse('fgh').should == ['f', 'g', 'h']
348 lambda { parslet.parse('i') }.should raise_error(Walrat::ParseError)