1 # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are met:
5 # 1. Redistributions of source code must retain the above copyright notice,
6 # this list of conditions and the following disclaimer.
7 # 2. Redistributions in binary form must reproduce the above copyright notice,
8 # this list of conditions and the following disclaimer in the documentation
9 # and/or other materials provided with the distribution.
11 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21 # POSSIBILITY OF SUCH DAMAGE.
23 require File.expand_path('../spec_helper', File.dirname(__FILE__))
25 describe Walrat::Grammar do
27 it 'complains if either parameter is nil' do
29 class AxeGrammar < Walrat::Grammar
30 rule nil, 'expression'
32 end.to raise_error(ArgumentError, /nil symbol/)
35 class BoneGrammar < Walrat::Grammar
38 end.to raise_error(ArgumentError, /nil parseable/)
41 class CatGrammar < Walrat::Grammar
44 end.to raise_error(ArgumentError, /nil/)
47 it 'complains if an attempt is made to define a rule a second time' do
49 class DogGrammar < Walrat::Grammar
53 end.to raise_error(ArgumentError, /already defined/)
57 describe 'defining productions in a grammar' do
58 it '"node" method should complain if new class name is nil' do
60 class NodeComplainingGrammar < Walrat::Grammar
63 end.to raise_error(ArgumentError, /nil new_class_name/)
66 it 'should be able to define a simple Node subclass using the "node" function' do
67 class NodeGrammar1 < Walrat::Grammar
68 node :my_node_subclass
69 node :my_subclass_of_a_subclass, :my_node_subclass
72 NodeGrammar1::MyNodeSubclass.superclass.should == Walrat::Node
73 NodeGrammar1::MySubclassOfASubclass.superclass.should == NodeGrammar1::MyNodeSubclass
76 it 'should complain if an attempt is made to create the same production class twice' do
78 class HowToGetControlOfJavaAwayFromSun < Walrat::Grammar
84 end.to raise_error(ArgumentError, /production already defined/)
87 it 'should complain if an attempt is made to create a production for a rule that does not exist yet' do
89 class GettingControlOfJavaAwayFromSun < Walrat::Grammar
93 end.to raise_error(ArgumentError, /non-existent rule/)
97 describe 'parsing using a grammar' do
98 it 'should complain if asked to parse a nil string' do
99 class BobGrammar < Walrat::Grammar; end
101 BobGrammar.new.parse(nil)
102 end.to raise_error(ArgumentError, /nil string/)
105 it 'should complain if trying to parse without first defining a start symbol' do
106 class RoyalGrammar < Walrat::Grammar; end
108 RoyalGrammar.new.parse('foo')
109 end.to raise_error(RuntimeError, /starting symbol not defined/)
112 it 'should parse starting with the start symbol' do
113 class AliceGrammar < Walrat::Grammar
115 starting_symbol :expr
118 grammar = AliceGrammar.new
119 grammar.parse('foo').should == 'foo'
120 lambda { grammar.parse('') }.should raise_error(Walrat::ParseError)
123 it 'should complain if reference is made to an undefined symbol' do
124 class RoyGrammar < Walrat::Grammar
125 starting_symbol :expr # :expr is not defined
129 RoyGrammar.new.parse('foo')
130 end.should raise_error(/no rule for key/)
133 it 'should be able to parse using a simple grammar (one rule)' do
134 class SimpleGrammar < Walrat::Grammar
139 grammar = SimpleGrammar.new
140 grammar.parse('foo!').should == 'foo!'
141 lambda { grammar.parse('---') }.should raise_error(Walrat::ParseError)
144 it 'should be able to parse using a simple grammar (two rules)' do
145 class AlmostAsSimpleGrammar < Walrat::Grammar
147 rule :foo, 'foo!' | :bar
151 grammar = AlmostAsSimpleGrammar.new
152 grammar.parse('foo!').should == 'foo!'
153 grammar.parse('bar').should == 'bar'
154 lambda { grammar.parse('---') }.should raise_error(Walrat::ParseError)
157 it 'should be able to parse using a simple grammar (three rules)' do
158 # a basic version written using intermediary parslets
159 # (really two parslets and one rule)
160 class MacGrammar < Walrat::Grammar
161 starting_symbol :comment
164 comment_marker = '##'
168 rule :comment, comment_marker & comment_body.optional
171 grammar = MacGrammar.new
172 grammar.parse('## hello!').should == ['##', ' hello!']
173 grammar.parse('##').should == '##'
174 lambda { grammar.parse('foobar') }.should raise_error(Walrat::ParseError)
176 # the same grammar rewritten without intermediary parslets
177 # (three rules, no standalone parslets)
178 class MacAltGrammar < Walrat::Grammar
179 starting_symbol :comment
180 rule :comment, :comment_marker & :comment_body.optional
181 rule :comment_marker, '##'
182 rule :comment_body, /.+/
185 grammar = MacAltGrammar.new
186 grammar.parse('## hello!').should == ['##', ' hello!']
187 grammar.parse('##').should == '##'
188 lambda { grammar.parse('foobar') }.should raise_error(Walrat::ParseError)
191 it 'should be able to parse using recursive rules (nested parentheses)' do
193 class NestedGrammar < Walrat::Grammar
194 starting_symbol :bracket_expression
195 rule :left_bracket, '('
196 rule :right_bracket, ')'
197 rule :bracket_content, (/[^()]+/ | :bracket_expression).zero_or_more
198 rule :bracket_expression, :left_bracket & :bracket_content.optional & :right_bracket
201 grammar = NestedGrammar.new
202 grammar.parse('()').should == ['(', ')']
203 grammar.parse('(content)').should == ['(', 'content', ')']
204 grammar.parse('(content (and more content))').should == ['(', ['content ', ['(', 'and more content', ')']], ')']
205 lambda { grammar.parse('(') }.should raise_error(Walrat::ParseError)
207 # same example but automatically skipping the delimiting braces for clearer output
208 class NestedSkippingGrammar < Walrat::Grammar
209 starting_symbol :bracket_expression
210 rule :bracket_expression, '('.skip & (/[^()]+/ | :bracket_expression).zero_or_more & ')'.skip
213 grammar = NestedSkippingGrammar.new
214 grammar.parse('()').should == []
215 grammar.parse('(content)').should == 'content'
216 grammar.parse('(content (and more content))').should == ['content ', 'and more content']
217 grammar.parse('(content (and more content)(and more))').should == ['content ', 'and more content', 'and more']
218 grammar.parse('(content (and more content)(and more)(more still))').should == ['content ', 'and more content', 'and more', 'more still']
219 grammar.parse('(content (and more content)(and more(more still)))').should == ['content ', 'and more content', ['and more', 'more still']]
220 lambda { grammar.parse('(') }.should raise_error(Walrat::ParseError)
222 # note that this confusing (possible even misleading) nesting goes away if you use a proper AST
223 class NestedBracketsWithAST < Walrat::Grammar
224 starting_symbol :bracket_expression
225 rule :text_expression, /[^()]+/
226 rule :bracket_expression,
228 (:text_expression | :bracket_expression).zero_or_more &
230 node :bracket_expression
231 production :bracket_expression, :children
235 grammar = NestedBracketsWithAST.new
236 grammar.parse('()').children.should == []
237 grammar.parse('(content)').children.to_s.should == 'content'
239 # nested test: two expressions at the first level, one of them nested
240 results = grammar.parse('(content (and more content))')
241 results.children[0].should == 'content '
242 results.children[1].children.to_s.should == 'and more content'
244 # nested test: three expressions at first level, two of them nested
245 results = grammar.parse('(content (and more content)(and more))')#.should == ['content ', 'and more content', 'and more']
246 results.children[0].should == 'content '
247 results.children[1].children.should == 'and more content'
248 results.children[2].children.should == 'and more'
250 # nested test: four expressions at the first level, three of them nested
251 results = grammar.parse('(content (and more content)(and more)(more still))')
252 results.children[0].should == 'content '
253 results.children[1].children.should == 'and more content'
254 results.children[2].children.should == 'and more'
255 results.children[3].children.should == 'more still'
257 # nested test: three expressions at the first level, one nested and another not only nested but containing another level of nesting
258 results = grammar.parse('(content (and more content)(and more(more still)))')
259 results.children[0].should == 'content '
260 results.children[1].children.should == 'and more content'
261 results.children[2].children[0].should == 'and more'
262 results.children[2].children[1].children.should == 'more still'
265 lambda { grammar.parse('(') }.should raise_error(Walrat::ParseError)
268 it 'should be able to parse using recursive rules (nested comments)' do
269 class NestedCommentsGrammar < Walrat::Grammar
270 starting_symbol :comment
271 rule :comment_start, '/*'
272 rule :comment_end, '*/'
273 rule :comment_content, (:comment | /\/+/ | ('*' & '/'.not!) | /[^*\/]+/).zero_or_more
274 rule :comment, '/*' & :comment_content.optional & '*/'
277 grammar = NestedCommentsGrammar.new
278 grammar.parse('/**/').should == ['/*', '*/']
279 grammar.parse('/*comment*/').should == ['/*', 'comment', '*/']
280 grammar.parse('/* comment /* nested */*/').should == ['/*', [' comment ', ['/*', ' nested ', '*/']], '*/']
281 lambda { grammar.parse('/*') }.should raise_error(Walrat::ParseError)
284 it 'should be able to write a grammar that produces an AST for a simple language that supports addition and assignment' do
285 class SimpleASTLanguage < Walrat::Grammar
286 starting_symbol :expression
289 rule :identifier, /[a-zA-Z_][a-zA-Z0-9_]*/
291 production :identifier
292 rule :integer_literal, /[0-9]+/
293 node :integer_literal
294 production :integer_literal
297 rule :expression, :assignment_expression | :addition_expression | :identifier | :integer_literal
299 rule :assignment_expression, :identifier & '='.skip & :expression
300 node :assignment_expression, :expression
301 production :assignment_expression, :target, :value
302 rule :addition_expression, (:identifier | :integer_literal) & '+'.skip & :expression
303 node :addition_expression, :expression
304 production :addition_expression, :summee, :summor
307 grammar = SimpleASTLanguage.new
308 results = grammar.parse('hello')
309 results.should be_kind_of(SimpleASTLanguage::Identifier)
310 results.lexeme.should == 'hello'
312 results = grammar.parse('1234')
313 results.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
314 results.lexeme.should == '1234'
316 results = grammar.parse('foo=bar')
317 results.should be_kind_of(SimpleASTLanguage::Expression)
318 results.should be_kind_of(SimpleASTLanguage::AssignmentExpression)
319 results.target.should be_kind_of(SimpleASTLanguage::Identifier)
320 results.target.lexeme.should == 'foo'
321 results.value.should be_kind_of(SimpleASTLanguage::Identifier)
322 results.value.lexeme.should == 'bar'
324 results = grammar.parse('baz+123')
325 results.should be_kind_of(SimpleASTLanguage::Expression)
326 results.should be_kind_of(SimpleASTLanguage::AdditionExpression)
327 results.summee.should be_kind_of(SimpleASTLanguage::Identifier)
328 results.summee.lexeme.should == 'baz'
329 results.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
330 results.summor.lexeme.should == '123'
332 results = grammar.parse('foo=abc+123')
333 results.should be_kind_of(SimpleASTLanguage::Expression)
334 results.should be_kind_of(SimpleASTLanguage::AssignmentExpression)
335 results.target.should be_kind_of(SimpleASTLanguage::Identifier)
336 results.target.lexeme.should == 'foo'
337 results.value.should be_kind_of(SimpleASTLanguage::AdditionExpression)
338 results.value.summee.should be_kind_of(SimpleASTLanguage::Identifier)
339 results.value.summee.lexeme.should == 'abc'
340 results.value.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
341 results.value.summor.lexeme.should == '123'
343 results = grammar.parse('a+b+2')
344 results.should be_kind_of(SimpleASTLanguage::Expression)
345 results.should be_kind_of(SimpleASTLanguage::AdditionExpression)
346 results.summee.should be_kind_of(SimpleASTLanguage::Identifier)
347 results.summee.lexeme.should == 'a'
348 results.summor.should be_kind_of(SimpleASTLanguage::AdditionExpression)
349 results.summor.summee.should be_kind_of(SimpleASTLanguage::Identifier)
350 results.summor.summee.lexeme.should == 'b'
351 results.summor.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral)
352 results.summor.summor.lexeme.should == '2'
355 it 'should be able to write a grammar that complains if all the input is not consumed' do
356 class ComplainingGrammar < Walrat::Grammar
357 starting_symbol :translation_unit
358 rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string
359 rule :end_of_string, /\z/
360 rule :whitespace, /\s+/
362 rule :word_list, :word >> (:whitespace.skip & :word).zero_or_more
365 grammar = ComplainingGrammar.new
366 grammar.parse('').should == ''
367 grammar.parse('foo').should == 'foo'
368 grammar.parse('foo bar').should == ['foo', 'bar']
369 lambda { grammar.parse('...') }.should raise_error(Walrat::ParseError)
370 lambda { grammar.parse('foo...') }.should raise_error(Walrat::ParseError)
371 lambda { grammar.parse('foo bar...') }.should raise_error(Walrat::ParseError)
374 it 'should be able to define a default parslet for intertoken skipping' do
376 class SkippingGrammar < Walrat::Grammar
377 starting_symbol :translation_unit
378 skipping :whitespace_and_newlines
379 rule :whitespace_and_newlines, /[\s\n\r]+/
380 rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string
381 rule :end_of_string, /\z/
382 rule :word_list, :word.zero_or_more
383 rule :word, /[a-z0-9_]+/
386 # not sure if I can justify the difference in behaviour here compared with the previous grammar
387 # if I catch these throws at the grammar level I can return nil
388 # but note that the previous grammar returns an empty array, which to_s is just ""
389 grammar = SkippingGrammar.new
390 lambda { grammar.parse('') }.should throw_symbol(:AndPredicateSuccess)
392 grammar.parse('foo').should == 'foo'
393 grammar.parse('foo bar').should == ['foo', 'bar'] # intervening whitespace
394 grammar.parse('foo bar ').should == ['foo', 'bar'] # trailing whitespace
395 grammar.parse(' foo bar').should == ['foo', 'bar'] # leading whitespace
397 # additional example, this time involving the ">>" pseudo-operator
398 class SkippingAndMergingGrammar < Walrat::Grammar
399 starting_symbol :translation_unit
400 skipping :whitespace_and_newlines
401 rule :whitespace_and_newlines, /[\s\n\r]+/
402 rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string
403 rule :end_of_string, /\z/
404 rule :word_list, :word >> (','.skip & :word).zero_or_more
405 rule :word, /[a-z0-9_]+/
409 grammar = SkippingAndMergingGrammar.new
410 grammar.parse('foo').should == 'foo'
413 grammar.parse('foo,bar').should == ['foo', 'bar'] # no whitespace
414 grammar.parse('foo, bar').should == ['foo', 'bar'] # whitespace after
415 grammar.parse('foo ,bar').should == ['foo', 'bar'] # whitespace before
416 grammar.parse('foo , bar').should == ['foo', 'bar'] # whitespace before and after
417 grammar.parse('foo , bar ').should == ['foo', 'bar'] # trailing and embedded whitespace
418 grammar.parse(' foo , bar').should == ['foo', 'bar'] # leading and embedded whitespace
420 # three or four words
421 grammar.parse('foo , bar, baz').should == ['foo', 'bar', 'baz']
422 grammar.parse(' foo , bar, baz ,bin').should == ['foo', 'bar', 'baz', 'bin']
425 it 'should complain if trying to set default skipping parslet more than once' do
427 class SetSkipperTwice < Walrat::Grammar
428 skipping :first # fine
429 skipping :again # should raise here
431 end.should raise_error(/default skipping parslet already set/)
434 it 'should complain if passed nil' do
436 class PassNilToSkipping < Walrat::Grammar
439 end.should raise_error(ArgumentError, /nil rule_or_parslet/)
442 it 'should be able to override default skipping parslet on a per-rule basis' do
443 # the example grammar parses word lists and number lists
444 class OverrideDefaultSkippingParslet < Walrat::Grammar
445 starting_symbol :translation_unit
446 skipping :whitespace_and_newlines
447 rule :whitespace_and_newlines, /\s+/ # any whitespace including newlines
448 rule :whitespace, /[ \t\v]+/ # literally only spaces, tabs, not newlines etc
449 rule :translation_unit, :component.one_or_more & :end_of_string.and? | :end_of_string
450 rule :end_of_string, /\z/
451 rule :component, :word_list | :number_list
452 rule :word_list, :word.one_or_more
454 rule :number, /[0-9]+/
456 # the interesting bit: we override the skipping rule for number lists
457 rule :number_list, :number.one_or_more
458 skipping :number_list, :whitespace # only whitespace, no newlines
461 # words in word lists can be separated by whitespace or newlines
462 grammar = OverrideDefaultSkippingParslet.new
463 grammar.parse('hello world').should == ['hello', 'world']
464 grammar.parse("hello\nworld").should == ['hello', 'world']
465 grammar.parse("hello world\nworld hello").should == ['hello', 'world', 'world', 'hello']
467 # numbers in number lists may be separated only by whitespace, not newlines
468 grammar.parse('123 456').should == ['123', '456']
469 grammar.parse("123\n456").should == ['123', '456'] # this succeeds because parser treats them as two separate number lists
470 grammar.parse("123 456\n456 123").should == [['123', '456'], ['456', '123']]
472 # intermixing word lists and number lists
473 grammar.parse("bar\n123").should == ['bar', '123']
474 grammar.parse("123\n456\nbar").should == ['123', '456', 'bar']
476 # these were buggy at one point: "123\n456" was getting mashed into "123456" due to misguided use of String#delete! to delete first newline
477 grammar.parse("\n123\n456").should == ['123', '456']
478 grammar.parse("bar\n123\n456").should == ['bar', '123', '456']
479 grammar.parse("baz bar\n123\n456").should == [['baz', 'bar'], '123', '456']
480 grammar.parse("hello world\nfoo\n123 456 baz bar\n123\n456").should == [['hello', 'world', 'foo'], ['123', '456'], ['baz', 'bar'], '123', '456']
483 it 'should complain if trying to override the default for the same rule twice' do
485 class OverrideSameRuleTwice < Walrat::Grammar
486 rule :the_rule, 'foo'
487 skipping :the_rule, :the_override # fine
488 skipping :the_rule, :the_override # should raise
490 end.to raise_error(ArgumentError, /skipping override already set for rule/)
493 it "should complain if trying to set an override for a rule that hasn't been defined yet" do
495 class OverrideUndefinedRule < Walrat::Grammar
496 skipping :non_existent_rule, :the_override
498 end.to raise_error(ArgumentError, /non-existent rule/)
501 it 'use of the "skipping" directive should play nicely with predicates' do
502 # example 1: word + predicate
503 class NicePlayer < Walrat::Grammar
506 rule :whitespace, /[ \t\v]+/
507 rule :foo, 'hello' & 'world'.and?
510 grammar = NicePlayer.new
511 grammar.parse('hello world').should == 'hello'
512 grammar.parse('hello world').should == 'hello'
513 grammar.parse('helloworld').should == 'hello'
514 lambda { grammar.parse('hello') }.should raise_error(Walrat::ParseError)
515 lambda { grammar.parse('hello buddy') }.should raise_error(Walrat::ParseError)
516 lambda { grammar.parse("hello\nbuddy") }.should raise_error(Walrat::ParseError)
518 # example 2: word + predicate + other word
519 class NicePlayer2 < Walrat::Grammar
522 rule :whitespace, /[ \t\v]+/
523 rule :foo, /hel../ & 'world'.and? & /\w+/
526 grammar = NicePlayer2.new
527 grammar.parse('hello world').should == ['hello', 'world']
528 grammar.parse('hello world').should == ['hello', 'world']
529 grammar.parse('helloworld').should == ['hello', 'world']
530 lambda { grammar.parse('hello') }.should raise_error(Walrat::ParseError)
531 lambda { grammar.parse('hello buddy') }.should raise_error(Walrat::ParseError)
532 lambda { grammar.parse("hello\nbuddy") }.should raise_error(Walrat::ParseError)