1 # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are met:
5 # 1. Redistributions of source code must retain the above copyright notice,
6 # this list of conditions and the following disclaimer.
7 # 2. Redistributions in binary form must reproduce the above copyright notice,
8 # this list of conditions and the following disclaimer in the documentation
9 # and/or other materials provided with the distribution.
11 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21 # POSSIBILITY OF SUCH DAMAGE.
24 require 'walrat/additions/string.rb'
29 # Lazy reader for the rules hash.
31 # Initializes the hash the first time it is accessed.
33 @rules or @rules = Hash.new do |hash, key|
34 raise "no rule for key '#{key}'"
38 # Lazy reader for the productions hash.
40 # Initializes the hash the first time it is accessed.
42 @productions or @productions = Hash.new do |hash, key|
43 raise "no production for key '#{key}'"
47 # Lazy reader for the skipping overrides hash.
49 # Initializes the hash the first time it is accessed.
50 def skipping_overrides
51 @skipping_overrides or @skipping_overrides = Hash.new do |hash, key|
52 raise "no skipping override for key '#{key}'"
56 # Sets the starting symbol.
58 # @param [Symbol] symbol a symbol which refers to a rule
59 def starting_symbol symbol
60 raise ArgumentError, 'starting symbol already set' if @starting_symbol
61 @starting_symbol = symbol
64 # Returns the starting symbol.
66 # Note that the "starting_symbol" method can't be used as an accessor
67 # because it is already used as part of the grammar-definition DSL.
72 # Sets the default parslet that is used for skipping inter-token
73 # whitespace, and can be used to override the default on a rule-by-rule
76 # This allows for simpler grammars which do not need to explicitly put
77 # optional whitespace parslets (or any other kind of parslet) between
80 # There are two modes of operation for this method. In the first mode
81 # (when only one parameter is passed) the rule_or_parslet parameter is
82 # used to define the default parslet for inter-token skipping.
83 # rule_or_parslet must refer to a rule which itself is a Parslet or
84 # ParsletCombination and which is responsible for skipping. Note that the
85 # ability to pass an arbitrary parslet means that the notion of what
86 # consitutes the "whitespace" that should be skipped is completely
87 # flexible. Raises if a default skipping parslet has already been set.
89 # In the second mode of operation (when two parameters are passed) the
90 # rule_or_parslet parameter is interpreted to be the rule to which an
91 # override should be applied, where the parslet parameter specifies the
92 # parslet to be used in this case. If nil is explicitly passed then this
93 # overrides the default parslet; no parslet will be used for the purposes
94 # of inter-token skipping. Raises if an override has already been set for
97 # The inter-token parslet is passed inside the "options" hash when
98 # invoking the "parse" methods. Any parser which fails will retry after
99 # giving this inter-token parslet a chance to consume and discard
100 # intervening whitespace.
102 # The initial, conservative implementation only performs this fallback
103 # skipping for ParsletSequence and ParsletRepetition combinations.
105 # Raises if rule_or_parslet is nil.
106 def skipping rule_or_parslet, parslet = NoParameterMarker.instance
107 raise ArgumentError, 'nil rule_or_parslet' if rule_or_parslet.nil?
108 if parslet == NoParameterMarker.instance
109 # first mode of operation: set default parslet
110 raise 'default skipping parslet already set' if @skipping
111 @skipping = rule_or_parslet
113 # second mode of operation: override default case
115 "skipping override already set for rule '#{rule_or_parslet}'" if
116 skipping_overrides.has_key? rule_or_parslet
118 "non-existent rule '#{rule_or_parslet}'" unless
119 rules.has_key? rule_or_parslet
120 skipping_overrides[rule_or_parslet] = parslet
124 # Returns the default skipping rule.
126 # Note that we can't use "skipping" as the accessor method here because
127 # it is already used as part of the grammar-definition DSL.
128 def default_skipping_rule
132 # Defines a rule and stores it
134 # Expects an object that responds to the parse message, such as a Parslet
135 # or ParsletCombination. As this is intended to work with Parsing
136 # Expression Grammars, each rule may only be defined once. Defining a
137 # rule more than once will raise an ArgumentError.
138 def rule symbol, parseable
139 raise ArgumentError, 'nil symbol' if symbol.nil?
140 raise ArgumentError, 'nil parseable' if parseable.nil?
142 "rule '#{symbol}' already defined" if rules.has_key? symbol
143 rules[symbol] = parseable
146 # Dynamically creates a Node subclass inside the namespace of the current
149 # This is used to create classes in a class hierarchy where no custom
150 # behavior is required and therefore no actual file with an impementation
151 # need be provided; an example from the Walrus grammar:
154 # class Grammar < Walrat::Grammar
155 # class Literal < Walrat::Node
156 # class StringLiteral < Literal
157 # class DoubleQuotedStringLiteral < StringLiteral
159 # In this example hiearchy the "Literal" class has custom behavior which
160 # is shared by all subclasses, and the custom behavior is implemented in
161 # the file "walrus/grammar/literal". The subclasses, however, have no
162 # custom behavior and no associated file. They are dynamically
163 # synthesized when the Walrus::Grammar class is first evaluated.
164 def node new_class_name, parent_class = Node
165 raise ArgumentError, 'nil new_class_name' if new_class_name.nil?
166 new_class_name = new_class_name.to_s.to_class_name # camel-case
167 unless parent_class.kind_of? Class
168 parent_class = const_get parent_class.to_s.to_class_name
170 const_set new_class_name, Class.new(parent_class)
173 # Specifies that a Node subclass will be used to encapsulate results
174 # for the rule identified by the symbol, rule_name. The class name is
175 # derived by converting the rule_name to camel-case.
177 # If no additional params are supplied then the class is assumed to
178 # accept a single parameter named "lexeme" in its initialize method.
180 # If additional params are supplied then the class is expected to
181 # accept the named params in its initialize method.
183 # As a convenience, the params will be sent to the specified class using
184 # the "production" method, which sets up an appropriate initializer.
188 # # accepts a single parameter, "lexeme"
189 # production :symbol_literal
191 # # accepts a single parameter, "content"
192 # production :multiline_comment, :content
194 # # accepts three parameters, "identifier", "params" and "content"
195 # production :block_directive, :identifier, :params, :content
197 def production rule_name, *results
198 raise ArgumentError, 'nil rule_name' if rule_name.nil?
200 "production already defined for rule '#{rule_name}'" if
201 productions.has_key?(rule_name)
202 raise ArgumentError, "non-existent rule '#{rule_name}'" unless
203 rules.has_key?(rule_name)
204 results = results.empty? ? [:lexeme] : results
205 const_get(rule_name.to_s.to_class_name).production *results
206 productions[rule_name] = results
209 # This method is called by the ParsletSequence and SymbolParslet classes
210 # to possibly wrap a parse result in a production node.
211 def wrap result, rule_name
212 if productions.has_key? rule_name.to_sym
213 node_class = const_get rule_name.to_s.to_class_name
214 param_count = productions[rule_name.to_sym].length
216 node = node_class.new result
218 node = node_class.new *result
220 node.start = (result.outer_start or result.start) # propagate the start information
221 node.end = (result.outer_end or result.end) # and the end information
222 node.source_text = (result.outer_source_text or result.source_text) # and the original source text
225 result.start = result.outer_start if result.outer_start
226 result.end = result.outer_end if result.outer_end
227 result.source_text = result.source_text if result.outer_source_text
233 attr_accessor :memoizing
239 # TODO: consider making grammars copiable (could be used in threaded context then)
240 #def initialize_copy(from); end
244 # Starts with starting_symbol.
245 def parse string, options = {}
246 raise ArgumentError, 'nil string' if string.nil?
247 raise 'starting symbol not defined' if self.class.start_rule.nil?
248 options[:grammar] = self.class
249 options[:rule_name] = self.class.start_rule
250 options[:skipping] = self.class.default_skipping_rule
251 options[:line_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
252 options[:column_start] = 0 # "richer" information (more human-friendly) than that provided in "location"
253 options[:memoizer] = MemoizingCache.new if @memoizing
254 self.class.start_rule.to_parseable.memoizing_parse string, options
257 # TODO: pretty print method?