From c67548841213fc1c57ebfa579db424f81347efae Mon Sep 17 00:00:00 2001 From: Wincent Colaiuta Date: Sat, 21 Aug 2010 12:35:26 +0200 Subject: [PATCH] Initial import (extraction from Walrus repo, commit 0c9d44c) Signed-off-by: Wincent Colaiuta --- Gemfile | 25 + Rakefile | 71 +++ lib/walrat.rb | 70 +++ lib/walrat/additions/proc.rb | 32 ++ lib/walrat/additions/regexp.rb | 33 ++ lib/walrat/additions/string.rb | 99 ++++ lib/walrat/additions/symbol.rb | 42 ++ lib/walrat/and_predicate.rb | 49 ++ lib/walrat/array_result.rb | 29 + lib/walrat/continuation_wrapper_exception.rb | 35 ++ lib/walrat/grammar.rb | 259 +++++++++ lib/walrat/left_recursion_exception.rb | 34 ++ lib/walrat/location_tracking.rb | 126 +++++ lib/walrat/match_data_wrapper.rb | 84 +++ lib/walrat/memoizing.rb | 55 ++ lib/walrat/memoizing_cache.rb | 126 +++++ lib/walrat/no_parameter_marker.rb | 30 + lib/walrat/node.rb | 63 +++ lib/walrat/not_predicate.rb | 49 ++ lib/walrat/parse_error.rb | 48 ++ lib/walrat/parser_state.rb | 205 +++++++ lib/walrat/parslet.rb | 38 ++ lib/walrat/parslet_choice.rb | 155 +++++ lib/walrat/parslet_combination.rb | 34 ++ lib/walrat/parslet_combining.rb | 190 +++++++ lib/walrat/parslet_merge.rb | 96 ++++ lib/walrat/parslet_omission.rb | 74 +++ lib/walrat/parslet_repetition.rb | 114 ++++ lib/walrat/parslet_repetition_default.rb | 77 +++ lib/walrat/parslet_sequence.rb | 241 ++++++++ lib/walrat/predicate.rb | 68 +++ lib/walrat/proc_parslet.rb | 60 ++ lib/walrat/regexp_parslet.rb | 84 +++ lib/walrat/skipped_substring_exception.rb | 46 ++ lib/walrat/string_enumerator.rb | 47 ++ lib/walrat/string_parslet.rb | 89 +++ lib/walrat/string_result.rb | 34 ++ lib/walrat/symbol_parslet.rb | 82 +++ spec/spec_helper.rb | 43 ++ spec/walrat/additions/proc_spec.rb | 31 + spec/walrat/additions/regexp_spec.rb | 52 ++ spec/walrat/additions/string_spec.rb | 112 ++++ spec/walrat/and_predicate_spec.rb | 39 ++ .../continuation_wrapper_exception_spec.rb | 31 + spec/walrat/grammar_spec.rb | 535 ++++++++++++++++++ spec/walrat/match_data_wrapper_spec.rb | 49 ++ spec/walrat/memoizing_cache_spec.rb | 109 ++++ spec/walrat/node_spec.rb | 27 + spec/walrat/not_predicate_spec.rb | 40 ++ spec/walrat/parser_state_spec.rb | 173 ++++++ spec/walrat/parslet_choice_spec.rb | 55 ++ spec/walrat/parslet_combining_spec.rb | 265 +++++++++ spec/walrat/parslet_merge_spec.rb | 39 ++ spec/walrat/parslet_omission_spec.rb | 74 +++ spec/walrat/parslet_repetition_spec.rb | 103 ++++ spec/walrat/parslet_sequence_spec.rb | 61 ++ spec/walrat/parslet_spec.rb | 33 ++ spec/walrat/predicate_spec.rb | 59 ++ spec/walrat/proc_parslet_spec.rb | 65 +++ spec/walrat/regexp_parslet_spec.rb | 369 ++++++++++++ spec/walrat/string_enumerator_spec.rb | 88 +++ spec/walrat/string_parslet_spec.rb | 145 +++++ spec/walrat/symbol_parslet_spec.rb | 38 ++ walrat.gemspec | 50 ++ 64 files changed, 5778 insertions(+) create mode 100644 Gemfile create mode 100644 Rakefile create mode 100644 lib/walrat.rb create mode 100755 lib/walrat/additions/proc.rb create mode 100755 lib/walrat/additions/regexp.rb create mode 100644 lib/walrat/additions/string.rb create mode 100755 lib/walrat/additions/symbol.rb create mode 100755 lib/walrat/and_predicate.rb create mode 100644 lib/walrat/array_result.rb create mode 100755 lib/walrat/continuation_wrapper_exception.rb create mode 100755 lib/walrat/grammar.rb create mode 100755 lib/walrat/left_recursion_exception.rb create mode 100755 lib/walrat/location_tracking.rb create mode 100755 lib/walrat/match_data_wrapper.rb create mode 100755 lib/walrat/memoizing.rb create mode 100755 lib/walrat/memoizing_cache.rb create mode 100644 lib/walrat/no_parameter_marker.rb create mode 100755 lib/walrat/node.rb create mode 100755 lib/walrat/not_predicate.rb create mode 100755 lib/walrat/parse_error.rb create mode 100755 lib/walrat/parser_state.rb create mode 100755 lib/walrat/parslet.rb create mode 100755 lib/walrat/parslet_choice.rb create mode 100755 lib/walrat/parslet_combination.rb create mode 100755 lib/walrat/parslet_combining.rb create mode 100755 lib/walrat/parslet_merge.rb create mode 100755 lib/walrat/parslet_omission.rb create mode 100755 lib/walrat/parslet_repetition.rb create mode 100755 lib/walrat/parslet_repetition_default.rb create mode 100755 lib/walrat/parslet_sequence.rb create mode 100755 lib/walrat/predicate.rb create mode 100755 lib/walrat/proc_parslet.rb create mode 100755 lib/walrat/regexp_parslet.rb create mode 100755 lib/walrat/skipped_substring_exception.rb create mode 100755 lib/walrat/string_enumerator.rb create mode 100755 lib/walrat/string_parslet.rb create mode 100644 lib/walrat/string_result.rb create mode 100755 lib/walrat/symbol_parslet.rb create mode 100755 spec/spec_helper.rb create mode 100755 spec/walrat/additions/proc_spec.rb create mode 100755 spec/walrat/additions/regexp_spec.rb create mode 100755 spec/walrat/additions/string_spec.rb create mode 100755 spec/walrat/and_predicate_spec.rb create mode 100755 spec/walrat/continuation_wrapper_exception_spec.rb create mode 100755 spec/walrat/grammar_spec.rb create mode 100755 spec/walrat/match_data_wrapper_spec.rb create mode 100755 spec/walrat/memoizing_cache_spec.rb create mode 100755 spec/walrat/node_spec.rb create mode 100755 spec/walrat/not_predicate_spec.rb create mode 100755 spec/walrat/parser_state_spec.rb create mode 100755 spec/walrat/parslet_choice_spec.rb create mode 100755 spec/walrat/parslet_combining_spec.rb create mode 100755 spec/walrat/parslet_merge_spec.rb create mode 100755 spec/walrat/parslet_omission_spec.rb create mode 100755 spec/walrat/parslet_repetition_spec.rb create mode 100755 spec/walrat/parslet_sequence_spec.rb create mode 100755 spec/walrat/parslet_spec.rb create mode 100755 spec/walrat/predicate_spec.rb create mode 100755 spec/walrat/proc_parslet_spec.rb create mode 100755 spec/walrat/regexp_parslet_spec.rb create mode 100755 spec/walrat/string_enumerator_spec.rb create mode 100755 spec/walrat/string_parslet_spec.rb create mode 100755 spec/walrat/symbol_parslet_spec.rb create mode 100644 walrat.gemspec diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..e8b24db --- /dev/null +++ b/Gemfile @@ -0,0 +1,25 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +source :gemcutter + +gemspec diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..778a7e4 --- /dev/null +++ b/Rakefile @@ -0,0 +1,71 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'rake' +require 'rubygems' +require 'spec/rake/spectask' +require 'spec/rake/verify_rcov' +require File.expand_path('lib/walrus/version', File.dirname(__FILE__)) + +desc 'Run specs with coverage' +Spec::Rake::SpecTask.new('coverage') do |t| + t.spec_files = FileList['spec/**/*_spec.rb'] + t.rcov = true + t.rcov_opts = ['--exclude', "spec"] +end + +desc 'Run specs' +task :spec do + sh 'bin/spec spec' +end + +desc 'Verify that test coverage is above minimum threshold' +RCov::VerifyTask.new(:verify => :spec) do |t| + t.threshold = 99.2 # never adjust expected coverage down, only up + t.index_html = 'coverage/index.html' +end + +desc 'Generate specdocs for inclusions in RDoc' +Spec::Rake::SpecTask.new('specdoc') do |t| + t.spec_files = FileList['spec/**/*_spec.rb'] + t.spec_opts = ['--format', 'rdoc'] + t.out = 'specdoc.rd' +end + +BUILT_GEM_DEPENDENCIES = Dir[ + 'walrus.gemspec', + 'bin/walrus', + 'lib/**/*.rb' +] + +BUILT_GEM = "walrus-#{Walrus::VERSION}.gem" +file BUILT_GEM => BUILT_GEM_DEPENDENCIES do + sh 'gem build walrus.gemspec' +end + +desc 'Build gem ("gem build")' +task :build => BUILT_GEM + +desc 'Publish gem ("gem push")' +task :push => :build do + sh "gem push #{BUILT_GEM}" +end diff --git a/lib/walrat.rb b/lib/walrat.rb new file mode 100644 index 0000000..fe55b2c --- /dev/null +++ b/lib/walrat.rb @@ -0,0 +1,70 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +# Ruby 1.9 support +require 'continuation' unless Kernel.respond_to?(:callcc) + +module Walrat + major, minor = RUBY_VERSION.split '.' + if major == '1' and minor == '8' + $KCODE = 'U' # UTF-8 (necessary for Unicode support) + end + + autoload :AndPredicate, 'walrat/and_predicate' + autoload :ArrayResult, 'walrat/array_result' + autoload :ContinuationWrapperException, 'walrat/continuation_wrapper_exception' + autoload :Grammar, 'walrat/grammar' + autoload :LeftRecursionException, 'walrat/left_recursion_exception' + autoload :LocationTracking, 'walrat/location_tracking' + autoload :MatchDataWrapper, 'walrat/match_data_wrapper' + autoload :Memoizing, 'walrat/memoizing' + autoload :MemoizingCache, 'walrat/memoizing_cache' + autoload :Node, 'walrat/node' + autoload :NoParameterMarker, 'walrat/no_parameter_marker' + autoload :NotPredicate, 'walrat/not_predicate' + autoload :ParseError, 'walrat/parse_error' + autoload :ParserState, 'walrat/parser_state' + + # TODO: move these into subdirectory? directory for predicates also? + autoload :Parslet, 'walrat/parslet' + autoload :ParsletChoice, 'walrat/parslet_choice' + autoload :ParsletCombination, 'walrat/parslet_combination' + autoload :ParsletCombining, 'walrat/parslet_combining' + autoload :ParsletMerge, 'walrat/parslet_merge' + autoload :ParsletOmission, 'walrat/parslet_omission' + autoload :ParsletRepetition, 'walrat/parslet_repetition' + autoload :ParsletRepetitionDefault, 'walrat/parslet_repetition_default' + autoload :ParsletSequence, 'walrat/parslet_sequence' + autoload :Predicate, 'walrat/predicate' + autoload :ProcParslet, 'walrat/proc_parslet' + autoload :RegexpParslet, 'walrat/regexp_parslet' + autoload :SkippedSubstringException, 'walrat/skipped_substring_exception' + autoload :StringEnumerator, 'walrat/string_enumerator' + autoload :StringParslet, 'walrat/string_parslet' + autoload :StringResult, 'walrat/string_result' + autoload :SymbolParslet, 'walrat/symbol_parslet' +end # module Walrat + +require 'walrat/additions/proc' +require 'walrat/additions/regexp' +require 'walrat/additions/string' +require 'walrat/additions/symbol' diff --git a/lib/walrat/additions/proc.rb b/lib/walrat/additions/proc.rb new file mode 100755 index 0000000..c3b9a4a --- /dev/null +++ b/lib/walrat/additions/proc.rb @@ -0,0 +1,32 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +class Proc + include Walrat::ParsletCombining + + # Returns a ProcParslet based on the receiver + def to_parseable + Walrat::ProcParslet.new self + end +end # class Proc diff --git a/lib/walrat/additions/regexp.rb b/lib/walrat/additions/regexp.rb new file mode 100755 index 0000000..6390e8e --- /dev/null +++ b/lib/walrat/additions/regexp.rb @@ -0,0 +1,33 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +class Regexp + require 'walrat/parslet_combining' + include Walrat::ParsletCombining + + # Returns a RegexpParslet based on the receiver + def to_parseable + Walrat::RegexpParslet.new self + end +end # class Regexp diff --git a/lib/walrat/additions/string.rb b/lib/walrat/additions/string.rb new file mode 100644 index 0000000..fe6d18e --- /dev/null +++ b/lib/walrat/additions/string.rb @@ -0,0 +1,99 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +# Additions to String class for Unicode support. +# Parslet combining methods. +# Convenience methods (to_parseable). +# Conversion utility methods. +class String + alias old_range [] + + def jlength + chars.to_a.length + end + + # NOTE: this is a totally Walrat-specific implementation that is + # unlikely to be of use anywhere else. It is used in only 1 place + # in the codebase, and works around the fact that the MatchData + # made available by the index method gets clobbered by the + # "chars.to_a" call. The same thing happens for alternative + # methods of counting the chars, such as using jlength or a manual + # scan. + # + # One workaround is for the caller to re-perform the index call just + # to get the MatchData again, but that is inefficient. So here we + # just do the addition before returning the result to the caller. + def jindex_plus_length arg + if i = index(arg) + $~[0].length + unpack('C*')[0...i].pack('C*').chars.to_a.length + end + end + + # Unlike the normal rindex method, the MatchData in $~ set by the inner + # rindex call gets clobbered (by the "chars.to_a" call) and is not visible to + # the caller of this method. + def jrindex arg, offset = Walrat::NoParameterMarker.instance + if offset == Walrat::NoParameterMarker.instance + i = rindex arg + else + i = rindex arg, offset + end + i ? unpack('C*')[0...i].pack('C*').chars.to_a.length : nil + end + + # multi-byte friendly [] implementation + def [](range, other = Walrat::NoParameterMarker.instance) + if other == Walrat::NoParameterMarker.instance + if range.kind_of? Range + chars.to_a[range].join + else + old_range range + end + else + old_range range, other + end + end + + # Returns a character-level enumerator for the receiver. + def enumerator + Walrat::StringEnumerator.new self + end + + # Rationale: it's ok to add "&" and "|" methods to string because they don't + # exist yet (they're not overrides). + include Walrat::ParsletCombining + + # Returns a StringParslet based on the receiver + def to_parseable + Walrat::StringParslet.new self + end + + # Converts the receiver of the form "foo_bar" to "FooBar". Specifically, the + # receiver is split into pieces delimited by underscores, each component is + # then converted to captial case (the first letter is capitalized and the + # remaining letters are lowercased) and finally the components are joined. + def to_class_name + self.split('_').collect { |component| component.capitalize}.join + end +end # class String diff --git a/lib/walrat/additions/symbol.rb b/lib/walrat/additions/symbol.rb new file mode 100755 index 0000000..b59fc94 --- /dev/null +++ b/lib/walrat/additions/symbol.rb @@ -0,0 +1,42 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +class Symbol + include Walrat::ParsletCombining + + # Returns a SymbolParslet based on the receiver. + # Symbols can be used in Grammars when specifying rules and productions to + # refer to other rules and productions that have not been defined yet. + # They can also be used to allow self-references within rules and productions + # (recursion); for example: + # + # rule :thing & :thing.optional & :other_thing + # + # Basically these SymbolParslets allow deferred evaluation of a rule or + # production (deferred until parsing takes place) rather than being evaluated + # at the time a rule or production is defined. + def to_parseable + Walrat::SymbolParslet.new self + end +end # class Symbol diff --git a/lib/walrat/and_predicate.rb b/lib/walrat/and_predicate.rb new file mode 100755 index 0000000..ebc5f76 --- /dev/null +++ b/lib/walrat/and_predicate.rb @@ -0,0 +1,49 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class AndPredicate < Predicate + def parse string, options = {} + raise ArgumentError if string.nil? + catch :ZeroWidthParseSuccess do + begin + parsed = @parseable.memoizing_parse string, options + rescue ParseError + raise ParseError.new('predicate not satisfied (expected "%s") while parsing "%s"' % [@parseable.to_s, string], + :line_end => options[:line_start], + :column_end => options[:column_start]) + end + end + + # getting this far means that parsing succeeded + throw :AndPredicateSuccess + end + + private + + def hash_offset + 12 + end + end +end # module Walrat diff --git a/lib/walrat/array_result.rb b/lib/walrat/array_result.rb new file mode 100644 index 0000000..47ad012 --- /dev/null +++ b/lib/walrat/array_result.rb @@ -0,0 +1,29 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ArrayResult < Array + include LocationTracking + end # class ArrayResult +end # module Walrat diff --git a/lib/walrat/continuation_wrapper_exception.rb b/lib/walrat/continuation_wrapper_exception.rb new file mode 100755 index 0000000..3b484ae --- /dev/null +++ b/lib/walrat/continuation_wrapper_exception.rb @@ -0,0 +1,35 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ContinuationWrapperException < Exception + attr_reader :continuation + + def initialize continuation + raise ArgumentError, 'nil continuation' if continuation.nil? + super self.class.to_s + @continuation = continuation + end + end # class ContinuationWrapperException +end # module Walrat diff --git a/lib/walrat/grammar.rb b/lib/walrat/grammar.rb new file mode 100755 index 0000000..1dde2ef --- /dev/null +++ b/lib/walrat/grammar.rb @@ -0,0 +1,259 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' +require 'walrat/additions/string.rb' + +module Walrat + class Grammar + class << self + # Lazy reader for the rules hash. + # + # Initializes the hash the first time it is accessed. + def rules + @rules or @rules = Hash.new do |hash, key| + raise "no rule for key '#{key}'" + end + end + + # Lazy reader for the productions hash. + # + # Initializes the hash the first time it is accessed. + def productions + @productions or @productions = Hash.new do |hash, key| + raise "no production for key '#{key}'" + end + end + + # Lazy reader for the skipping overrides hash. + # + # Initializes the hash the first time it is accessed. + def skipping_overrides + @skipping_overrides or @skipping_overrides = Hash.new do |hash, key| + raise "no skipping override for key '#{key}'" + end + end + + # Sets the starting symbol. + # + # @param [Symbol] symbol a symbol which refers to a rule + def starting_symbol symbol + raise ArgumentError, 'starting symbol already set' if @starting_symbol + @starting_symbol = symbol + end + + # Returns the starting symbol. + # + # Note that the "starting_symbol" method can't be used as an accessor + # because it is already used as part of the grammar-definition DSL. + def start_rule + @starting_symbol + end + + # Sets the default parslet that is used for skipping inter-token + # whitespace, and can be used to override the default on a rule-by-rule + # basis. + # + # This allows for simpler grammars which do not need to explicitly put + # optional whitespace parslets (or any other kind of parslet) between + # elements. + # + # There are two modes of operation for this method. In the first mode + # (when only one parameter is passed) the rule_or_parslet parameter is + # used to define the default parslet for inter-token skipping. + # rule_or_parslet must refer to a rule which itself is a Parslet or + # ParsletCombination and which is responsible for skipping. Note that the + # ability to pass an arbitrary parslet means that the notion of what + # consitutes the "whitespace" that should be skipped is completely + # flexible. Raises if a default skipping parslet has already been set. + # + # In the second mode of operation (when two parameters are passed) the + # rule_or_parslet parameter is interpreted to be the rule to which an + # override should be applied, where the parslet parameter specifies the + # parslet to be used in this case. If nil is explicitly passed then this + # overrides the default parslet; no parslet will be used for the purposes + # of inter-token skipping. Raises if an override has already been set for + # the named rule. + # + # The inter-token parslet is passed inside the "options" hash when + # invoking the "parse" methods. Any parser which fails will retry after + # giving this inter-token parslet a chance to consume and discard + # intervening whitespace. + # + # The initial, conservative implementation only performs this fallback + # skipping for ParsletSequence and ParsletRepetition combinations. + # + # Raises if rule_or_parslet is nil. + def skipping rule_or_parslet, parslet = NoParameterMarker.instance + raise ArgumentError, 'nil rule_or_parslet' if rule_or_parslet.nil? + if parslet == NoParameterMarker.instance + # first mode of operation: set default parslet + raise 'default skipping parslet already set' if @skipping + @skipping = rule_or_parslet + else + # second mode of operation: override default case + raise ArgumentError, + "skipping override already set for rule '#{rule_or_parslet}'" if + skipping_overrides.has_key? rule_or_parslet + raise ArgumentError, + "non-existent rule '#{rule_or_parslet}'" unless + rules.has_key? rule_or_parslet + skipping_overrides[rule_or_parslet] = parslet + end + end + + # Returns the default skipping rule. + # + # Note that we can't use "skipping" as the accessor method here because + # it is already used as part of the grammar-definition DSL. + def default_skipping_rule + @skipping + end + + # Defines a rule and stores it + # + # Expects an object that responds to the parse message, such as a Parslet + # or ParsletCombination. As this is intended to work with Parsing + # Expression Grammars, each rule may only be defined once. Defining a + # rule more than once will raise an ArgumentError. + def rule symbol, parseable + raise ArgumentError, 'nil symbol' if symbol.nil? + raise ArgumentError, 'nil parseable' if parseable.nil? + raise ArgumentError, + "rule '#{symbol}' already defined" if rules.has_key? symbol + rules[symbol] = parseable + end + + # Dynamically creates a Node subclass inside the namespace of the current + # grammar. + # + # This is used to create classes in a class hierarchy where no custom + # behavior is required and therefore no actual file with an impementation + # need be provided; an example from the Walrus grammar: + # + # module Walrus + # class Grammar < Walrat::Grammar + # class Literal < Walrat::Node + # class StringLiteral < Literal + # class DoubleQuotedStringLiteral < StringLiteral + # + # In this example hiearchy the "Literal" class has custom behavior which + # is shared by all subclasses, and the custom behavior is implemented in + # the file "walrus/grammar/literal". The subclasses, however, have no + # custom behavior and no associated file. They are dynamically + # synthesized when the Walrus::Grammar class is first evaluated. + def node new_class_name, parent_class = Node + raise ArgumentError, 'nil new_class_name' if new_class_name.nil? + new_class_name = new_class_name.to_s.to_class_name # camel-case + unless parent_class.kind_of? Class + parent_class = const_get parent_class.to_s.to_class_name + end + const_set new_class_name, Class.new(parent_class) + end + + # Specifies that a Node subclass will be used to encapsulate results + # for the rule identified by the symbol, rule_name. The class name is + # derived by converting the rule_name to camel-case. + # + # If no additional params are supplied then the class is assumed to + # accept a single parameter named "lexeme" in its initialize method. + # + # If additional params are supplied then the class is expected to + # accept the named params in its initialize method. + # + # As a convenience, the params will be sent to the specified class using + # the "production" method, which sets up an appropriate initializer. + # + # For example: + # + # # accepts a single parameter, "lexeme" + # production :symbol_literal + # + # # accepts a single parameter, "content" + # production :multiline_comment, :content + # + # # accepts three parameters, "identifier", "params" and "content" + # production :block_directive, :identifier, :params, :content + # + def production rule_name, *results + raise ArgumentError, 'nil rule_name' if rule_name.nil? + raise ArgumentError, + "production already defined for rule '#{rule_name}'" if + productions.has_key?(rule_name) + raise ArgumentError, "non-existent rule '#{rule_name}'" unless + rules.has_key?(rule_name) + results = results.empty? ? [:lexeme] : results + const_get(rule_name.to_s.to_class_name).production *results + productions[rule_name] = results + end + + # This method is called by the ParsletSequence and SymbolParslet classes + # to possibly wrap a parse result in a production node. + def wrap result, rule_name + if productions.has_key? rule_name.to_sym + node_class = const_get rule_name.to_s.to_class_name + param_count = productions[rule_name.to_sym].length + if param_count == 1 + node = node_class.new result + else + node = node_class.new *result + end + node.start = (result.outer_start or result.start) # propagate the start information + node.end = (result.outer_end or result.end) # and the end information + node.source_text = (result.outer_source_text or result.source_text) # and the original source text + node + else + result.start = result.outer_start if result.outer_start + result.end = result.outer_end if result.outer_end + result.source_text = result.source_text if result.outer_source_text + result + end + end + end + + attr_accessor :memoizing + + def initialize + @memoizing = true + end + + # TODO: consider making grammars copiable (could be used in threaded context then) + #def initialize_copy(from); end + #def clone; end + #def dupe; end + + # Starts with starting_symbol. + def parse string, options = {} + raise ArgumentError, 'nil string' if string.nil? + raise 'starting symbol not defined' if self.class.start_rule.nil? + options[:grammar] = self.class + options[:rule_name] = self.class.start_rule + options[:skipping] = self.class.default_skipping_rule + options[:line_start] = 0 # "richer" information (more human-friendly) than that provided in "location" + options[:column_start] = 0 # "richer" information (more human-friendly) than that provided in "location" + options[:memoizer] = MemoizingCache.new if @memoizing + self.class.start_rule.to_parseable.memoizing_parse string, options + end + + # TODO: pretty print method? + end # class Grammar +end # module Walrus diff --git a/lib/walrat/left_recursion_exception.rb b/lib/walrat/left_recursion_exception.rb new file mode 100755 index 0000000..0ca9b0d --- /dev/null +++ b/lib/walrat/left_recursion_exception.rb @@ -0,0 +1,34 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class LeftRecursionException < Exception + attr_accessor :continuation + + def initialize continuation = nil + super self.class.to_s + @continuation = continuation + end + end # class LeftRecursionException +end # module Walrat diff --git a/lib/walrat/location_tracking.rb b/lib/walrat/location_tracking.rb new file mode 100755 index 0000000..284de96 --- /dev/null +++ b/lib/walrat/location_tracking.rb @@ -0,0 +1,126 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # Methods for embedding location information in objects returned (or + # exceptions raised) from parse methods. + module LocationTracking + attr_reader :source_text + + # For occasions where a single item must serve as a carrier for array-like + # information (that is, its own start, end and source_text, as well as the + # "outer" equivalents). This can happen where a single node appears in a + # list context surrounded only by skipped content. + attr_accessor :outer_start, :outer_end, :outer_source_text + + def source_text=(string) + @source_text = string.to_s.clone + end + + # Sets @column_start to col. + # Sets @column_start to 0 if passed nil (for ease of use, users of classes + # that mix-in this module don't have to worry about special casing nil + # values). + def column_start=(column_start) + @column_start = column_start.to_i + end + + # Returns 0 if @column_start is nil (for ease of use, users of classes that + # mix-in this module don't have to worry about special casing nil values). + def column_start + @column_start || 0 + end + + # Sets @line_start to line. + # Sets @line_start to 0 if passed nil (for ease of use, users of classes + # that mix-in this module don't have to worry about special casing nil + # values). + def line_start=(line_start) + @line_start = line_start.to_i + end + + # Returns 0 if @line_start is nil (for ease of use, users of classes that + # mix-in this module don't have to worry about special casing nil values). + def line_start + @line_start || 0 + end + + # Convenience method for getting both line_start and column_start at once. + def start + [self.line_start, self.column_start] + end + + # Convenience method for setting both line_start and column_start at once. + def start=(array) + raise ArgumentError if array.nil? + raise ArgumentError if array.length != 2 + self.line_start = array[0] + self.column_start = array[1] + end + + def line_end=(line_end) + @line_end = line_end.to_i + end + + def line_end + @line_end || 0 + end + + def column_end=(column_end) + @column_end = column_end.to_i + end + + def column_end + @column_end || 0 + end + + # Convenience method for getting both line_end and column_end at once. + def end + [self.line_end, self.column_end] + end + + # Convenience method for setting both line_end and column_end at once. + def end=(array) + raise ArgumentError if array.nil? + raise ArgumentError if array.length != 2 + self.line_end = array[0] + self.column_end = array[1] + end + + # Given another object that responds to column_end and line_end, returns + # true if the receiver is rightmost or equal. + # If the other object is farther to the right returns false. + def rightmost? other + if self.line_end > other.line_end + true + elsif other.line_end > self.line_end + false + elsif self.column_end >= other.column_end + true + else + false + end + end + end # module LocationTracking +end # module Walrat diff --git a/lib/walrat/match_data_wrapper.rb b/lib/walrat/match_data_wrapper.rb new file mode 100755 index 0000000..148e67b --- /dev/null +++ b/lib/walrat/match_data_wrapper.rb @@ -0,0 +1,84 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # Simple wrapper for MatchData objects that implements length, to_s and + # to_str methods. + # + # By implementing to_str, MatchDataWrappers can be directly compared with + # Strings using the == method. The original MatchData instance can be + # obtained using the match_data accessor. Upon creation a clone of the passed + # in MatchData object is stored; this means that the $~ global variable can + # be conveniently wrapped without having to worry that subsequent operations + # will alter the contents of the variable. + class MatchDataWrapper + include Walrat::LocationTracking + + attr_reader :match_data + + # Raises if data is nil. + def initialize data + raise ArgumentError, 'nil data' if data.nil? + self.match_data = data + end + + # The definition of this method, in conjunction with the == method, allows + # automatic comparisons with String objects using the == method. + # This is because in a parser matches essentially are Strings (just like + # Exceptions and Pathnames); it's just that this class encapsulates a + # little more information (the match data) for those who want it. + def to_str + self.to_s + end + + # Although this method explicitly allows for MatchDataWrapper to + # MatchDataWrapper comparisons, note that all such comparisons will return + # false except for those between instances which were initialized with + # exactly the same match data instance; this is because the MatchData class + # itself always returns false when compared with other MatchData instances. + def ==(other) + if other.kind_of? MatchDataWrapper + self.match_data == other.match_data + elsif other.respond_to? :to_str + self.to_str == other.to_str + else + false + end + end + + def to_s + @match_data[0] + end + + def jlength + self.to_s.jlength + end + + private + + def match_data=(data) + @match_data = (data.clone rescue data) + end + end # class MatchDataWrapper +end # module Walrat diff --git a/lib/walrat/memoizing.rb b/lib/walrat/memoizing.rb new file mode 100755 index 0000000..56c33d1 --- /dev/null +++ b/lib/walrat/memoizing.rb @@ -0,0 +1,55 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + module Memoizing + # This method provides a clean, optional implementation of memoizing by + # serving as a wrapper for all parse invocations. Rather than calling the + # parse methods directly, this method should be called; if it is + # appropriate to use a memoizer then it will be invoked, otherwise control + # will fall through to the real parse method. Turning off memoizing is as + # simple as not passing a value with the :memoizer key in the options hash. + # This method defined is in a separate module so that it can easily be + # mixed in with all Parslets, ParsletCombinations and Predicates. + def memoizing_parse(string, options = {}) + # will use memoizer if available and not instructed to ignore it + if options.has_key?(:memoizer) and not + (options.has_key?(:ignore_memoizer) and options[:ignore_memoizer]) + options[:parseable] = self + options[:memoizer].parse string, options + else # otherwise will proceed as normal + options[:ignore_memoizer] = false + parse string, options + end + end + + # Can only check for left recursion if memoizing is turned on (the help of + # the memoizer is needed). + def check_left_recursion parseable, options = {} + return unless options.has_key?(:memoizer) + options[:memoizer].check_left_recursion parseable, options + end + end # module Memoizing +end # module Walrat + diff --git a/lib/walrat/memoizing_cache.rb b/lib/walrat/memoizing_cache.rb new file mode 100755 index 0000000..3e8cc4e --- /dev/null +++ b/lib/walrat/memoizing_cache.rb @@ -0,0 +1,126 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # The MemoizingCache class memoizes the outcomes of parse operations. The + # functionality is implemented as a separate class so as to minimize the + # amount of "contamination" of other classes by memoizing code, and to allow + # memoizing to be cleanly turned on or off at will. If a MemoizingCache is + # passed to a Parslet, ParsletCombination or Predicate as a value for the + # :memoizer key in the options hash passed to a parse method, the class + # implementing that method will call the parse method on the cache rather + # than proceeding normally. The cache will either propagate the previously + # memoized result, or will defer back to the original class to obtain the + # result. A circular dependency is avoided by setting the :skip_memoizer flag + # in the options dictionary. If no MemoizingCache is passed then normal + # program flow takes place. + class MemoizingCache + # Singleton class that serves as a default value for unset keys in a Hash. + class NoValueForKey + require 'singleton' + include Singleton + end + + def initialize + # The results of parse operations are stored (memoized) in a cache, keyed + # on a unique identifier comprising the Parslet, ParsletCombination or + # Predicate used in the parse operation, the location of the operation + # (the line_start and column_start), and the skipping override (if any). + # The values may be: + # + # - ParseErrors raised during parsing + # - SkippedSubstringExceptions raised during parsing + # - :ZeroWidthParseSuccess symbols thrown during parsing + # - :AndPredicateSuccess symbols thrown during parsing + # - :NotPredicateSuccess symbols thrown during parsing + # - String instances returned as parse results + # - MatchDataWrapper instance returned as parse results + # - Array instances containing ordered collections of parse results + # - Node subclass instances containing AST productions + @cache = Hash.new NoValueForKey.instance + end + + # The receiver checks whether there is already a stored result + # corresponding to that a unique identifier that specifies the + # "coordinates" of a parsing operation (location, parseable, skipping + # override). If found propogates the result directly to the caller rather + # than performing the parse method all over again. Here "propagation" means + # re-raising parse errors, re-throwing symbols, and returning object + # references. If not found, performs the parsing operation and stores the + # result in the cache before propagating it. + def parse string, options = {} + raise ArgumentError if string.nil? + + # construct a unique identifier + identifier = [options[:parseable], options[:line_start], options[:column_start]] + identifier << options[:origin] if options.has_key? :origin + identifier << options[:skipping_override] if options.has_key? :skipping_override + + if (result = @cache[identifier]) != NoValueForKey.instance + if result.kind_of? Symbol + throw result + elsif result.kind_of? Exception + raise result + else + return result + end + else + # first time for this parseable/location/skipping_override (etc) + # combination; capture result and propagate + catch :NotPredicateSuccess do + catch :AndPredicateSuccess do + catch :ZeroWidthParseSuccess do + begin + options[:ignore_memoizer] = true + + # short-circuit left recursion here rather than infinite + # looping + if options[:parseable].kind_of? SymbolParslet + check_left_recursion(options[:parseable], options) + @last_seen_symbol_parslet = options[:parseable] + @last_seen_symbol_parslet_location = [options[:line_start], options[:column_start]] + end + + return @cache[identifier] = options[:parseable].memoizing_parse(string, options) # store and return + rescue Exception => e + raise @cache[identifier] = e # store and re-raise + end + end + throw @cache[identifier] = :ZeroWidthParseSuccess # store and re-throw + end + throw @cache[identifier] = :AndPredicateSuccess # store and re-throw + end + throw @cache[identifier] = :NotPredicateSuccess # store and re-throw + end + end + + def check_left_recursion parseable, options = {} + if parseable.kind_of? SymbolParslet and + @last_seen_symbol_parslet == parseable and + @last_seen_symbol_parslet_location == [options[:line_start], options[:column_start]] + raise LeftRecursionException + end + end + end # class MemoizingCache +end # module Walrat diff --git a/lib/walrat/no_parameter_marker.rb b/lib/walrat/no_parameter_marker.rb new file mode 100644 index 0000000..d237bed --- /dev/null +++ b/lib/walrat/no_parameter_marker.rb @@ -0,0 +1,30 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class NoParameterMarker + require 'singleton' + include Singleton + end # class NoParameterMarker +end # module Walrat diff --git a/lib/walrat/node.rb b/lib/walrat/node.rb new file mode 100755 index 0000000..4769b9f --- /dev/null +++ b/lib/walrat/node.rb @@ -0,0 +1,63 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # Make subclasses of this for us in Abstract Syntax Trees (ASTs). + class Node + include Walrat::LocationTracking + + attr_reader :lexeme + + def initialize lexeme + @string_value = lexeme.to_s + @lexeme = lexeme + end + + def to_s + @string_value + end + + # Overrides the default initialize method to accept the defined + # attributes and sets up an read accessor for each. + # + # Raises an error if called directly on Node itself rather than + # a subclass. + def self.production *results + raise 'Node#production called directly on Node' if self == Node + + # set up accessors + results.each { |result| attr_reader result } + + # set up initializer + initialize_body = "def initialize #{results.map { |symbol| symbol.to_s}.join(', ')}\n" + initialize_body << %Q{ @string_value = ""\n} + results.each do |result| + initialize_body << " @#{result} = #{result}\n" + initialize_body << " @string_value << #{result}.to_s\n" + end + initialize_body << "end\n" + class_eval initialize_body + end + end # class Node +end # module Walrat diff --git a/lib/walrat/not_predicate.rb b/lib/walrat/not_predicate.rb new file mode 100755 index 0000000..66cfa51 --- /dev/null +++ b/lib/walrat/not_predicate.rb @@ -0,0 +1,49 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class NotPredicate < Predicate + def parse string, options = {} + raise ArgumentError, 'nil string' if string.nil? + catch :ZeroWidthParseSuccess do + begin + @parseable.memoizing_parse(string, options) + rescue ParseError # failed to pass (which is just what we wanted) + throw :NotPredicateSuccess + end + end + + # getting this far means that parsing succeeded (not what we wanted) + raise ParseError.new('predicate not satisfied ("%s" not allowed) while parsing "%s"' % [@parseable.to_s, string], + :line_end => options[:line_start], + :column_end => options[:column_start]) + end + + private + + def hash_offset + 11 + end + end +end # module Walrat diff --git a/lib/walrat/parse_error.rb b/lib/walrat/parse_error.rb new file mode 100755 index 0000000..d57e5a2 --- /dev/null +++ b/lib/walrat/parse_error.rb @@ -0,0 +1,48 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ParseError < Exception + include Walrat::LocationTracking + + # Takes an optional hash (for packing extra info into exception). + # position in string (irrespective of line number, column number) + # line number, column number + # filename + def initialize message, info = {} + super message + self.line_start = info[:line_start] + self.column_start = info[:column_start] + self.line_end = info[:line_end] + self.column_end = info[:column_end] + end + + def inspect + # TODO: also return filename if available + '#<%s: %s @line_end=%d, @column_end=%d>' % + [ self.class.to_s, self.to_s, self.line_end, self.column_end ] + end + end # class ParseError +end # module Walrat + diff --git a/lib/walrat/parser_state.rb b/lib/walrat/parser_state.rb new file mode 100755 index 0000000..b15bf96 --- /dev/null +++ b/lib/walrat/parser_state.rb @@ -0,0 +1,205 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # Simple class for maintaining state during a parse operation. + class ParserState + attr_reader :options + + # Returns the remainder (the unparsed portion) of the string. Will return + # an empty string if already at the end of the string. + attr_reader :remainder + + # Raises an ArgumentError if string is nil. + def initialize string, options = {} + raise ArgumentError, 'nil string' if string.nil? + self.base_string = string + @results = ArrayResult.new # for accumulating results + @remainder = @base_string.clone + @scanned = '' + @options = options.clone + + # start wherever we last finished (doesn't seem to behave different to + # the alternative) + @options[:line_start] = (@options[:line_end] or @options[:line_start] or 0) + @options[:column_start] = (@options[:column_end] or @options[:column_start] or 0) + #@options[:line_start] = 0 if @options[:line_start].nil? + #@options[:column_start] = 0 if @options[:column_start].nil? + + # before parsing begins, end point is equal to start point + @options[:line_end] = @options[:line_start] + @options[:column_end] = @options[:column_start] + @original_line_start = @options[:line_start] + @original_column_start = @options[:column_start] + end + + # The parsed method is used to inform the receiver of a successful parsing + # event. + # + # Note that substring need not actually be a String but it must respond to + # the following messages: + # - "line_end" and "column_end" so that the end position of the receiver + # can be updated + # As a convenience returns the remainder. + # Raises an ArgumentError if substring is nil. + def parsed substring + raise ArgumentError if substring.nil? + update_and_return_remainder_for_string substring, true + end + + # The skipped method is used to inform the receiver of a successful parsing + # event where the parsed substring should be consumed but not included in + # the accumulated results. + # The substring should respond to "line_end" and "column_end". + # In all other respects this method behaves exactly like the parsed method. + def skipped substring + raise ArgumentError if substring.nil? + update_and_return_remainder_for_string substring + end + + # The auto_skipped method is used to inform the receiver of a successful + # parsing event where the parsed substring should be consumed but not + # included in the accumulated results and furthermore the parse event + # should not affect the overall bounds of the parse result. In reality this + # means that the method is only ever called upon the successful use of a + # automatic intertoken "skipping" parslet. By definition this method should + # only be called for intertoken skipping otherwise incorrect results will + # be produced. + def auto_skipped substring + raise ArgumentError if substring.nil? + a, b, c, d = @options[:line_start], @options[:column_start], + @options[:line_end], @options[:column_end] # save + remainder = update_and_return_remainder_for_string(substring) + @options[:line_start], @options[:column_start], + @options[:line_end], @options[:column_end] = a, b, c, d # restore + remainder + end + + # Returns the results accumulated so far. + # Returns an empty array if no results have been accumulated. + # Returns a single object if only one result has been accumulated. + # Returns an array of objects if multiple results have been accumulated. + def results + updated_start = [@original_line_start, @original_column_start] + updated_end = [@options[:line_end], @options[:column_end]] + updated_source_text = @scanned.clone + + if @results.length == 1 + # here we ask the single result to exhibit container-like properties + # use the "outer" variants so as to not overwrite any data internal to + # the result itself + # this can happen where a lone result is surrounded only by skipped + # elements + # the result has to convey data about its own limits, plus those of the + # context just around it + results = @results[0] + results.outer_start = updated_start if results.start != updated_start + results.outer_end = updated_end if results.end != updated_end + results.outer_source_text = updated_source_text if results.source_text != updated_source_text + + # the above trick fixes some of the location tracking issues but opens + # up another can of worms + # uncomment this line to see + #return results + + # need some way of handling unwrapped results (raw results, not AST + # nodes) as well + results.start = updated_start + results.end = updated_end + results.source_text = updated_source_text + else + results = @results + results.start = updated_start + results.end = updated_end + results.source_text = updated_source_text + end + results + end + + # Returns the number of results accumulated so far. + def length + @results.length + end + + # TODO: possibly implement "undo/rollback" and "reset" methods + # if I implement "undo" will probbaly do it as a stack + # will have the option of implementing "redo" as well but I'd only do that if I could think of a use for it + + private + + def update_and_return_remainder_for_string input, store = false + previous_line_end = @options[:line_end] # remember old end point + previous_column_end = @options[:column_end] # remember old end point + + # special case handling for literal String objects + if input.instance_of? String + input = StringResult.new(input) + input.start = [previous_line_end, previous_column_end] + if (line_count = input.scan(/\r\n|\r|\n/).length) != 0 # count number of newlines in receiver + column_end = input.jlength - input.jrindex(/\r|\n/) - 1 # calculate characters on last line + else # no newlines in match + column_end = input.jlength + previous_column_end + end + input.end = [previous_line_end + line_count, column_end] + end + + @results << input if store + + if input.line_end > previous_line_end # end line has advanced + @options[:line_end] = input.line_end + @options[:column_end] = 0 + end + + if input.column_end > @options[:column_end] # end column has advanced + @options[:column_end] = input.column_end + end + + @options[:line_start] = @options[:line_end] # new start point is old end point + @options[:column_start] = @options[:column_end] # new start point is old end point + + # calculate remainder + line_delta = @options[:line_end] - previous_line_end + if line_delta > 0 # have consumed newline(s) + line_delta.times do # remove them from remainder + newline_location = @remainder.jindex_plus_length /\r\n|\r|\n/ # find the location of the next newline + @scanned << @remainder[0...newline_location] # record scanned text + @remainder = @remainder[newline_location..-1] # strip everything up to and including the newline + end + @scanned << @remainder[0...@options[:column_end]] + @remainder = @remainder[@options[:column_end]..-1] # delete up to the current column + else # no newlines consumed + column_delta = @options[:column_end] - previous_column_end + if column_delta > 0 # there was movement within currentline + @scanned << @remainder[0...column_delta] + @remainder = @remainder[column_delta..-1] # delete up to the current column + end + end + @remainder + end + + def base_string=(string) + @base_string = (string.clone rescue string) + end + end # class ParserState +end # module Walrat diff --git a/lib/walrat/parslet.rb b/lib/walrat/parslet.rb new file mode 100755 index 0000000..9b214af --- /dev/null +++ b/lib/walrat/parslet.rb @@ -0,0 +1,38 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class Parslet + include Walrat::ParsletCombining + include Walrat::Memoizing + + def to_parseable + self + end + + def parse string, options = {} + raise NotImplementedError # subclass responsibility + end + end # class Parslet +end # module Walrat diff --git a/lib/walrat/parslet_choice.rb b/lib/walrat/parslet_choice.rb new file mode 100755 index 0000000..76a4082 --- /dev/null +++ b/lib/walrat/parslet_choice.rb @@ -0,0 +1,155 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ParsletChoice < ParsletCombination + attr_reader :hash + + # Either parameter may be a Parslet or a ParsletCombination. + # Neither parmeter may be nil. + def initialize left, right, *others + raise ArgumentError if left.nil? + raise ArgumentError if right.nil? + @alternatives = [left, right] + others + update_hash + end + + # Override so that alternatives are appended to an existing sequence: + # Consider the following example: + # + # A | B + # + # This constitutes a single choice: + # + # (A | B) + # + # If we then make this a three-element sequence: + # + # A | B | C + # + # We are effectively creating an nested sequence containing the original + # sequence and an additional element: + # + # ((A | B) | C) + # + # Although such a nested sequence is correctly parsed it is not as + # architecturally clean as a single sequence without nesting: + # + # (A | B | C) + # + # This method allows us to use the architecturally cleaner format. + def |(next_parslet) + append next_parslet + end + + # First tries to parse the left option, falling back and trying the right + # option and then the any subsequent options in the others instance + # variable on failure. If no options successfully complete parsing then an + # ParseError is raised. Any zero-width parse successes thrown by + # alternative parsers will flow on to a higher level. + def parse string, options = {} + raise ArgumentError if string.nil? + error = nil # for error reporting purposes will track which parseable gets farthest to the right before failing + left_recursion = nil # will also track any left recursion that we detect + @alternatives.each do |parseable| + begin + result = parseable.memoizing_parse(string, options) # successful parse + if left_recursion and left_recursion.continuation # and we have a continuation + continuation = left_recursion.continuation # continuations are once-only, one-way tickets + left_recursion = nil # set this to nil so as not to call it again without meaning to + continuation.call(result) # so jump back to where we were before + end + return result + rescue LeftRecursionException => e + left_recursion = e + + # TODO: + # it's not enough to just catch this kind of exception and remember + # the last one + # may need to accumulate these in an array + # consider the example rule: + # :a, :a & :b | :a & :c | :a & :d | :b + # the first option will raise a LeftRecursionException + # the next option will raise for the same reason + # the third likewise + # finally we get to the fourth option, the first which might succeed + # at that point we should have three continuations + # we should try the first, falling back to the second and third if + # necessary + # on successfully retrying, need to start all over again and try all + # the options again, just in case further recursion is possible + # so it is quite complicated + # the question is, is it more complicated than the other ways of + # getting right-associativity into Walrat-generated parsers? + rescue ParseError => e + if error.nil? + error = e + else + error = e unless error.rightmost?(e) + end + end + end + + # should generally report the rightmost error + raise ParseError.new('no valid alternatives while parsing "%s" (%s)' % [string, error.to_s], + :line_end => error.line_end, + :column_end => error.column_end) + end + + def eql? other + return false if not other.instance_of? ParsletChoice + other_alternatives = other.alternatives + return false if @alternatives.length != other_alternatives.length + for i in 0..(@alternatives.length - 1) + return false unless @alternatives[i].eql? other_alternatives[i] + end + true + end + + protected + + # For determining equality. + attr_reader :alternatives + + private + + def update_hash + # fixed offset to avoid unwanted collisions with similar classes + @hash = 30 + @alternatives.each { |parseable| @hash += parseable.hash } + end + + # Appends another Parslet (or ParsletCombination) to the receiver and + # returns the receiver. + # Raises if parslet is nil. + # Cannot use << as a method name because Ruby cannot parse it without + # the self, and self is not allowed as en explicit receiver for private messages. + def append next_parslet + raise ArgumentError if next_parslet.nil? + @alternatives << next_parslet.to_parseable + update_hash + self + end + end # class ParsletChoice +end # module Walrat diff --git a/lib/walrat/parslet_combination.rb b/lib/walrat/parslet_combination.rb new file mode 100755 index 0000000..eda38a4 --- /dev/null +++ b/lib/walrat/parslet_combination.rb @@ -0,0 +1,34 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ParsletCombination + include Walrat::ParsletCombining + include Walrat::Memoizing + + def to_parseable + self + end + end # module ParsletCombination +end # module Walrat diff --git a/lib/walrat/parslet_combining.rb b/lib/walrat/parslet_combining.rb new file mode 100755 index 0000000..629aa0c --- /dev/null +++ b/lib/walrat/parslet_combining.rb @@ -0,0 +1,190 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # The ParsletCombining module, together with the ParsletCombination class and + # its subclasses, provides simple container classes for encapsulating + # relationships among Parslets. By storing this information outside of the + # Parslet objects themselves their design is kept clean and they can become + # immutable objects which are much more easily copied and shared among + # multiple rules in a Grammar. + module ParsletCombining + # Convenience method. + def memoizing_parse string, options = {} + self.to_parseable.memoizing_parse string, options + end + + # Convenience method. + def parse string, options = {} + self.to_parseable.parse string, options + end + + # Defines a sequence of Parslets (or ParsletCombinations). + # Returns a ParsletSequence instance. + def sequence first, second, *others + Walrat::ParsletSequence.new first.to_parseable, + second.to_parseable, *others + end + + # Shorthand for ParsletCombining.sequence(first, second). + def &(next_parslet) + self.sequence self, next_parslet + end + + # Defines a sequence of Parslets similar to the sequence method but with + # the difference that the contents of array results from the component + # parslets will be merged into a single array rather than being added as + # arrays. To illustrate: + # + # 'foo' & 'bar'.one_or_more # returns results like ['foo', ['bar', 'bar', 'bar']] + # 'foo' >> 'bar'.one_or_more # returns results like ['foo', 'bar', 'bar', 'bar'] + # + def merge first, second, *others + Walrat::ParsletMerge.new first.to_parseable, + second.to_parseable, *others + end + + # Shorthand for ParsletCombining.sequence(first, second) + def >>(next_parslet) + self.merge self, next_parslet + end + + # Defines a choice of Parslets (or ParsletCombinations). + # Returns a ParsletChoice instance. + def choice left, right, *others + Walrat::ParsletChoice.new left.to_parseable, + right.to_parseable, *others + end + + # Shorthand for ParsletCombining.choice(left, right) + def |(alternative_parslet) + self.choice self, alternative_parslet + end + + # Defines a repetition of the supplied Parslet (or ParsletCombination). + # Returns a ParsletRepetition instance. + def repetition parslet, min, max + Walrat::ParsletRepetition.new parslet.to_parseable, min, max + end + + # Shorthand for ParsletCombining.repetition. + def repeat min = nil, max = nil + self.repetition self, min, max + end + + def repetition_with_default parslet, min, max, default + Walrat::ParsletRepetitionDefault.new parslet.to_parseable, min, + max, default + end + + def repeat_with_default min = nil, max = nil, default = nil + self.repetition_with_default self, min, max, default + end + + # Shorthand for ParsletCombining.repetition(0, 1). + # This method optionally takes a single parameter specifying what object + # should be returned as a placeholder when there are no matches; this is + # useful for packing into ASTs where it may be better to parse an empty + # Array rather than nil. The specified object is cloned and returned in the + # event that there are no matches. As a convenience, the specified object + # is automatically extended using the LocationTracking module (this is a + # convenience so that you can specify empty Arrays, "[]", rather than + # explicitly passing an "ArrayResult.new") + def optional default_return_value = NoParameterMarker.instance + if default_return_value == NoParameterMarker.instance + self.repeat 0, 1 # default behaviour + else + self.repeat_with_default 0, 1, default_return_value + end + end + + # Alternative to optional. + def zero_or_one + self.optional + end + + # possible synonym "star" + def zero_or_more default_return_value = NoParameterMarker.instance + if default_return_value == NoParameterMarker.instance + self.repeat 0 # default behaviour + else + self.repeat_with_default 0, nil, default_return_value + end + end + + # possible synonym "plus" + def one_or_more + self.repeat 1 + end + + # Parsing Expression Grammar support. + # Succeeds if parslet succeeds but consumes no input (throws an + # :AndPredicateSuccess symbol). + def and_predicate parslet + Walrat::AndPredicate.new parslet.to_parseable + end + + # Shorthand for and_predicate + # Strictly speaking, this shorthand breaks with established Ruby practice + # that "?" at the end of a method name should indicate a method that + # returns true or false. + def and? + self.and_predicate self + end + + # Parsing Expression Grammar support. + # Succeeds if parslet fails (throws a :NotPredicateSuccess symbol). + # Fails if parslet succeeds (raise a ParseError). + # Consumes no output. + # This method will almost invariably be used in conjuntion with the & + # operator, like this: + # rule :foo, :p1 & :p2.not_predicate + # rule :foo, :p1 & :p2.not! + def not_predicate parslet + Walrat::NotPredicate.new parslet.to_parseable + end + + # Shorthand for not_predicate. + # Strictly speaking, this shorthand breaks with established Ruby practice + # that "!" at the end of a method name should indicate a destructive + # behaviour on (mutation of) the receiver. + def not! + self.not_predicate self + end + + # Succeeds if parsing succeeds, consuming the output, but doesn't actually + # return anything. + # + # This is for elements which are required but which shouldn't appear in the + # final AST. + def omission parslet + Walrat::ParsletOmission.new parslet.to_parseable + end + + # Shorthand for ParsletCombining.omission + def skip + self.omission self + end + end # module ParsletCombining +end # module Walrat diff --git a/lib/walrat/parslet_merge.rb b/lib/walrat/parslet_merge.rb new file mode 100755 index 0000000..9b68337 --- /dev/null +++ b/lib/walrat/parslet_merge.rb @@ -0,0 +1,96 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ParsletMerge < ParsletSequence + def parse string, options = {} + raise ArgumentError if string.nil? + state = ParserState.new string, options + last_caught = nil # keep track of the last kind of throw to be caught + @components.each do |parseable| + catch :ProcessNextComponent do + catch :NotPredicateSuccess do + catch :AndPredicateSuccess do + catch :ZeroWidthParseSuccess do + begin + parsed = parseable.memoizing_parse state.remainder, state.options + if parsed.respond_to? :each + parsed.each { |element| state.parsed element } + else + state.parsed(parsed) + end + rescue SkippedSubstringException => e + state.skipped(e) + # rescue ParseError => e # failed, will try to skip; save original error in case skipping fails + # if options.has_key?(:skipping_override) : skipping_parslet = options[:skipping_override] + # elsif options.has_key?(:skipping) : skipping_parslet = options[:skipping] + # else skipping_parslet = nil + # end + # raise e if skipping_parslet.nil? # no skipper defined, raise original error + # begin + # # guard against self references (possible infinite recursion) here? + # parsed = skipping_parslet.memoizing_parse(state.remainder, state.options) + # state.skipped(parsed) + # redo # skipping succeeded, try to redo + # rescue ParseError + # raise e # skipping didn't help either, raise original error + # end + end + last_caught = nil + throw :ProcessNextComponent # can't use "next" here because it will only break out of innermost "do" + end + last_caught = :ZeroWidthParseSuccess + throw :ProcessNextComponent + end + last_caught = :AndPredicateSuccess + throw :ProcessNextComponent + end + last_caught = :NotPredicateSuccess + end + end + + if state.results.respond_to? :empty? and state.results.empty? and + throw last_caught + else + state.results + end + end + + def eql?(other) + return false if not other.instance_of? ParsletMerge + other_components = other.components + return false if @components.length != other_components.length + for i in 0..(@components.length - 1) + return false unless @components[i].eql? other_components[i] + end + true + end + + private + + def hash_offset + 53 + end + end # class ParsletMerge +end # module Walrat diff --git a/lib/walrat/parslet_omission.rb b/lib/walrat/parslet_omission.rb new file mode 100755 index 0000000..fe8c14a --- /dev/null +++ b/lib/walrat/parslet_omission.rb @@ -0,0 +1,74 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ParsletOmission < ParsletCombination + attr_reader :hash + + # Raises an ArgumentError if parseable is nil. + def initialize parseable + raise ArgumentError, 'nil parseable' if parseable.nil? + @parseable = parseable + + # fixed offset to avoid unwanted collisions with similar classes + @hash = @parseable.hash + 46 + end + + def parse string, options = {} + raise ArgumentError, 'nil string' if string.nil? + substring = StringResult.new + substring.start = [options[:line_start], options[:column_start]] + substring.end = [options[:line_start], options[:column_start]] + + # possibly should catch these here as well + #catch :NotPredicateSuccess do + #catch :AndPredicateSuccess do + # one of the fundamental problems is that if a parslet throws such a + # symbol any info about already skipped material is lost (because the + # symbols contain nothing) + # this may be one reason to change these to exceptions... + catch :ZeroWidthParseSuccess do + substring = @parseable.memoizing_parse(string, options) + end + + # not enough to just return a ZeroWidthParseSuccess here; that could + # cause higher levels to stop parsing and in any case there'd be no + # clean way to embed the scanned substring in the symbol + raise SkippedSubstringException.new(substring, + :line_start => options[:line_start], + :column_start => options[:column_start], + :line_end => substring.line_end, + :column_end => substring.column_end) + end + + def eql?(other) + other.instance_of? ParsletOmission and other.parseable.eql? @parseable + end + + protected + + # For determining equality. + attr_reader :parseable + end # class ParsletOmission +end # module Walrat diff --git a/lib/walrat/parslet_repetition.rb b/lib/walrat/parslet_repetition.rb new file mode 100755 index 0000000..b2ecfb7 --- /dev/null +++ b/lib/walrat/parslet_repetition.rb @@ -0,0 +1,114 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ParsletRepetition < ParsletCombination + attr_reader :hash + + # Raises an ArgumentError if parseable or min is nil. + def initialize parseable, min, max = nil + raise ArgumentError, 'nil parseable' if parseable.nil? + raise ArgumentError, 'nil min' if min.nil? + @parseable = parseable + self.min = min + self.max = max + end + + def parse string, options = {} + raise ArgumentError, 'nil string' if string.nil? + state = ParserState.new string, options + catch :ZeroWidthParseSuccess do # a zero-width match is grounds for immediate abort + while @max.nil? or state.length < @max # try forever if max is nil; otherwise keep trying while match count < max + begin + parsed = @parseable.memoizing_parse state.remainder, state.options + state.parsed parsed + rescue SkippedSubstringException => e + state.skipped e + rescue ParseError => e # failed, will try to skip; save original error in case skipping fails + if options.has_key?(:skipping_override) + skipping_parslet = options[:skipping_override] + elsif options.has_key?(:skipping) + skipping_parslet = options[:skipping] + else + skipping_parslet = nil + end + break if skipping_parslet.nil? + begin + # guard against self references (possible infinite recursion) here? + parsed = skipping_parslet.memoizing_parse state.remainder, state.options + state.skipped parsed + redo # skipping succeeded, try to redo + rescue ParseError + break # skipping didn't help either, give up + end + end + end + end + + # now assess whether our tries met the requirements + if state.length == 0 and @min == 0 # success (special case) + throw :ZeroWidthParseSuccess + elsif state.length < @min # matches < min (failure) + raise ParseError.new('required %d matches but obtained %d while parsing "%s"' % [@min, state.length, string], + :line_end => state.options[:line_end], + :column_end => state.options[:column_end]) + else # success (general case) + state.results # returns multiple matches as an array, single matches as a single object + end + end + + def eql?(other) + other.instance_of? ParsletRepetition and + @min == other.min and + @max == other.max and + @parseable.eql? other.parseable + end + + protected + + # For determining equality. + attr_reader :parseable, :min, :max + + private + + def hash_offset + 87 + end + + def update_hash + # fixed offset to minimize risk of collisions + @hash = @min.hash + @max.hash + @parseable.hash + hash_offset + end + + def min=(min) + @min = (min.clone rescue min) + update_hash + end + + def max=(max) + @max = (max.clone rescue max) + update_hash + end + end # class ParsletRepetition +end # module Walrat diff --git a/lib/walrat/parslet_repetition_default.rb b/lib/walrat/parslet_repetition_default.rb new file mode 100755 index 0000000..804af9a --- /dev/null +++ b/lib/walrat/parslet_repetition_default.rb @@ -0,0 +1,77 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # ParsletRepetitionDefault is a subclass that modifies the behaviour of its + # parent, ParsletRepetition, in a very small way. Namely, if the outcome of + # parsing is a ZeroWidthParse success then it is caught and the default value + # (defined at initialization time) is returned instead. + class ParsletRepetitionDefault < ParsletRepetition + # Possible re-factoring to consider for the future: roll the functionality + # of this class in to ParsletRepetition itself. + # Benefit of keeping it separate is that the ParsletRepetition itself is + # kept simple. + def initialize parseable, min, max = nil, default = nil + super parseable, min, max + self.default = default + end + + def parse string, options = {} + catch :ZeroWidthParseSuccess do + return super string, options + end + @default.clone rescue @default + end + + def eql?(other) + other.instance_of? ParsletRepetitionDefault and + @min == other.min and + @max == other.max and + @parseable.eql? other.parseable and + @default == other.default + end + + protected + + # For determining equality. + attr_reader :default + + private + + def hash_offset + 69 + end + + def update_hash + # let super calculate its share of the hash first + @hash = super + @default.hash + end + + def default=(default) + @default = (default.clone rescue default) + @default.extend LocationTracking + update_hash + end + end # class ParsletRepetitionDefault +end # module Walrat diff --git a/lib/walrat/parslet_sequence.rb b/lib/walrat/parslet_sequence.rb new file mode 100755 index 0000000..1bec0a9 --- /dev/null +++ b/lib/walrat/parslet_sequence.rb @@ -0,0 +1,241 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ParsletSequence < ParsletCombination + attr_reader :hash + + # first and second may not be nil. + def initialize first, second, *others + raise ArgumentError if first.nil? + raise ArgumentError if second.nil? + @components = [first, second] + others + update_hash + end + + # Override so that sequences are appended to an existing sequence: + # Consider the following example: + # + # A & B + # + # This constitutes a single sequence: + # + # (A & B) + # + # If we then make this a three-element sequence: + # + # A & B & C + # + # We are effectively creating an nested sequence containing the original + # sequence and an additional element: + # + # ((A & B) & C) + # + # Although such a nested sequence is correctly parsed it produces unwanted + # nesting in the results because instead of returning a one-dimensional + # array of results: + # + # [a, b, c] + # + # It returns a nested array: + # + # [[a, b], c] + # + # The solution to this unwanted nesting is to allowing appending to an + # existing sequence by using the private "append" method. + # + # This ensures that: + # + # A & B & C + # + # Translates to a single sequence: + # + # (A & B & C) + # + # And a single, uni-dimensional results array: + # + # [a, b, c] + def &(next_parslet) + append next_parslet + end + + SKIP_FIRST = true + NO_SKIP = false + + def parse string, options = {} + parse_common NO_SKIP, string, options + end + + def parse_remainder string, options = {} + parse_common SKIP_FIRST, string, options + end + + def parse_common skip_first, string, options = {} + raise ArgumentError if string.nil? + state = ParserState.new(string, options) + last_caught = nil # keep track of the last kind of throw to be caught + left_recursion = false # keep track of whether left recursion was detected + + @components.each_with_index do |parseable, index| + if index == 0 # for first component only + if skip_first + next + end + begin + check_left_recursion(parseable, options) + rescue LeftRecursionException => e + left_recursion = true + continuation = nil + value = callcc { |c| continuation = c } + if value == continuation # first time that we're here + e.continuation = continuation # pack continuation into exception + raise e # and propagate + else + grammar = state.options[:grammar] + rule_name = state.options[:rule_name] + state.parsed grammar.wrap(value, rule_name) + next + end + end + end + + catch :ProcessNextComponent do + catch :NotPredicateSuccess do + catch :AndPredicateSuccess do + catch :ZeroWidthParseSuccess do + begin + parsed = parseable.memoizing_parse state.remainder, state.options + state.parsed parsed + rescue SkippedSubstringException => e + state.skipped e + rescue ParseError => e + # failed, will try to skip; save original error in case + # skipping fails + if options.has_key?(:skipping_override) + skipping_parslet = options[:skipping_override] + elsif options.has_key?(:skipping) + skipping_parslet = options[:skipping] + else + skipping_parslet = nil + end + raise e if skipping_parslet.nil? # no skipper defined, raise original error + begin + # guard against self references (possible infinite recursion) here? + parsed = skipping_parslet.memoizing_parse state.remainder, state.options + state.skipped(parsed) + redo # skipping succeeded, try to redo + rescue ParseError + raise e # skipping didn't help either, raise original error + end + end + last_caught = nil + + # can't use "next" here because it would only break out of + # innermost "do" rather than continuing the iteration + throw :ProcessNextComponent + end + last_caught = :ZeroWidthParseSuccess + throw :ProcessNextComponent + end + last_caught = :AndPredicateSuccess + throw :ProcessNextComponent + end + last_caught = :NotPredicateSuccess + end + end + + if left_recursion + results = recurse(state) + else + results = state.results + end + + return results if skip_first + + if results.respond_to? :empty? and results.empty? and last_caught + throw last_caught + else + results + end + end + + # Left-recursion helper + def recurse state + return state.results if state.remainder == '' # further recursion is not possible + new_state = ParserState.new state.remainder, state.options + last_successful_result = nil + while state.remainder != '' + begin + new_results = parse_remainder new_state.remainder, new_state.options + new_state.parsed new_results + last_successful_result = ArrayResult[last_successful_result || state.results, new_results] + rescue ParseError + break + end + end + last_successful_result || state.results + end + + def eql?(other) + return false if not other.instance_of? ParsletSequence + other_components = other.components + return false if @components.length != other_components.length + for i in 0..(@components.length - 1) + return false unless @components[i].eql? other_components[i] + end + true + end + + protected + + # For determining equality. + attr_reader :components + + private + + def hash_offset + 40 + end + + def update_hash + # fixed offset to avoid unwanted collisions with similar classes + @hash = hash_offset + @components.each { |parseable| @hash += parseable.hash } + end + + # Appends another Parslet, ParsletCombination or Predicate to the receiver + # and returns the receiver. + # + # Raises if next_parslet is nil. + # Cannot use << as a method name because Ruby cannot parse it without the + # self, and self is not allowed as en explicit receiver for private + # messages. + def append next_parslet + raise ArgumentError if next_parslet.nil? + @components << next_parslet.to_parseable + update_hash + self + end + end # class ParsletSequence +end # module Walrat diff --git a/lib/walrat/predicate.rb b/lib/walrat/predicate.rb new file mode 100755 index 0000000..e2b63e0 --- /dev/null +++ b/lib/walrat/predicate.rb @@ -0,0 +1,68 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # Predicates parse input without consuming it. + # On success they throw a subclass-specific symbol (see the AndPredicate and + # NotPredicate classes). + # On failure they raise a ParseError. + class Predicate + include Walrat::ParsletCombining + include Walrat::Memoizing + + attr_reader :hash + + # Raises if parseable is nil. + def initialize parseable + raise ArgumentError, 'nil parseable' if parseable.nil? + @parseable = parseable + + # fixed offset to avoid collisions with @parseable objects + @hash = @parseable.hash + hash_offset + end + + def to_parseable + self + end + + def parse string, options = {} + raise NotImplementedError # subclass responsibility + end + + def eql? other + other.instance_of? self.class and other.parseable.eql? @parseable + end + + protected + + # for equality comparisons + attr_reader :parseable + + private + + def hash_offset + 10 + end + end +end # module Walrat diff --git a/lib/walrat/proc_parslet.rb b/lib/walrat/proc_parslet.rb new file mode 100755 index 0000000..a4a39e7 --- /dev/null +++ b/lib/walrat/proc_parslet.rb @@ -0,0 +1,60 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class ProcParslet < Parslet + attr_reader :hash + + def initialize proc + raise ArgumentError, 'nil proc' if proc.nil? + self.expected_proc = proc + end + + def parse string, options = {} + raise ArgumentError, 'nil string' if string.nil? + @expected_proc.call string, options + end + + def eql?(other) + other.instance_of? ProcParslet and other.expected_proc == @expected_proc + end + + protected + + # For equality comparisons. + attr_reader :expected_proc + + private + + def expected_proc=(proc) + @expected_proc = (proc.clone rescue proc) + update_hash + end + + def update_hash + # fixed offset to avoid collisions with @parseable objects + @hash = @expected_proc.hash + 105 + end + end # class ProcParslet +end # module Walrat diff --git a/lib/walrat/regexp_parslet.rb b/lib/walrat/regexp_parslet.rb new file mode 100755 index 0000000..e592455 --- /dev/null +++ b/lib/walrat/regexp_parslet.rb @@ -0,0 +1,84 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class RegexpParslet < Parslet + attr_reader :hash + + def initialize regexp + raise ArgumentError, 'nil regexp' if regexp.nil? + self.expected_regexp = /\A#{regexp}/ # for efficiency, anchor all regexps + end + + def parse string, options = {} + raise ArgumentError, 'nil string' if string.nil? + if string =~ @expected_regexp + wrapper = MatchDataWrapper.new $~ + match = $~[0] + + if (line_count = match.scan(/\r\n|\r|\n/).length) != 0 # count number of newlines in match + column_end = match.jlength - match.jrindex(/\r|\n/) - 1 # calculate characters on last line + else # no newlines in match + column_end = match.jlength + (options[:column_start] || 0) + end + + wrapper.start = [options[:line_start], options[:column_start]] + wrapper.end = [wrapper.line_start + line_count, column_end] + wrapper.source_text = match.to_s.clone + wrapper + else + raise ParseError.new('non-matching characters "%s" while parsing regular expression "%s"' % [string, @expected_regexp.inspect], + :line_end => (options[:line_start] || 0), + :column_end => (options[:column_start] || 0)) + end + end + + def eql?(other) + other.instance_of? RegexpParslet and + other.expected_regexp == @expected_regexp + end + + def inspect + '#<%s:0x%x @expected_regexp=%s>' % + [self.class.to_s, self.object_id, @expected_regexp.inspect] + end + + protected + + # For equality comparisons. + attr_reader :expected_regexp + + private + + def expected_regexp=(regexp) + @expected_regexp = (regexp.clone rescue regexp) + update_hash + end + + def update_hash + # fixed offset to avoid collisions with @parseable objects + @hash = @expected_regexp.hash + 15 + end + end # class RegexpParslet +end # module Walrat diff --git a/lib/walrat/skipped_substring_exception.rb b/lib/walrat/skipped_substring_exception.rb new file mode 100755 index 0000000..93befa7 --- /dev/null +++ b/lib/walrat/skipped_substring_exception.rb @@ -0,0 +1,46 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # I don't really like using Exceptions for non-error situations, but it seems + # that using throw/catch here would not be adequate (not possible to embed + # information in the thrown symbol). + class SkippedSubstringException < Exception + include Walrat::LocationTracking + + def initialize substring, info = {} + super substring + + # TODO: this code is just like the code in ParseError. could save + # repeating it by setting up inheritance but would need to pay careful + # attention to the ordering of my rescue blocks and also change many + # instances of "kind_of" in my specs to "instance_of " + # alternatively, could look at using a mix-in + self.line_start = info[:line_start] + self.column_start = info[:column_start] + self.line_end = info[:line_end] + self.column_end = info[:column_end] + end + end # class SkippedSubstringException +end # module Walrat diff --git a/lib/walrat/string_enumerator.rb b/lib/walrat/string_enumerator.rb new file mode 100755 index 0000000..cbd1c7c --- /dev/null +++ b/lib/walrat/string_enumerator.rb @@ -0,0 +1,47 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'strscan' +require 'walrat' + +module Walrat + # Unicode-aware (UTF-8) string enumerator. + # For Unicode support $KCODE must be set to 'U' (UTF-8). + class StringEnumerator + # Returns the char most recently scanned before the last "next" call, or + # nil if nothing previously scanned. + attr_reader :last + + def initialize string + raise ArgumentError, 'nil string' if string.nil? + @scanner = StringScanner.new string + @current = nil + @last = nil + end + + # This method will only work as expected if $KCODE is set to 'U' (UTF-8). + def next + @last = @current + @current = @scanner.scan(/./m) # must use multiline mode or "." won't match newlines + end + end # class StringEnumerator +end # module Walrus diff --git a/lib/walrat/string_parslet.rb b/lib/walrat/string_parslet.rb new file mode 100755 index 0000000..6f5499f --- /dev/null +++ b/lib/walrat/string_parslet.rb @@ -0,0 +1,89 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class StringParslet < Parslet + attr_reader :hash + + def initialize string + raise ArgumentError if string.nil? + self.expected_string = string + end + + def parse string, options = {} + raise ArgumentError if string.nil? + chars = StringEnumerator.new string + parsed = StringResult.new + parsed.start = [options[:line_start], options[:column_start]] + parsed.end = parsed.start + expected_string.each_char do |expected_char| + actual_char = chars.next + if actual_char.nil? + raise ParseError.new('unexpected end-of-string (expected "%s") while parsing "%s"' % + [ expected_char, expected_string ], + :line_end => parsed.line_end, + :column_end => parsed.column_end) + elsif actual_char != expected_char + raise ParseError.new('unexpected character "%s" (expected "%s") while parsing "%s"' % + [ actual_char, expected_char, expected_string], + :line_end => parsed.line_end, + :column_end => parsed.column_end) + else + if actual_char == "\r" or + (actual_char == "\n" and chars.last != "\r") # catches Mac, Windows and UNIX end-of-line markers + parsed.column_end = 0 + parsed.line_end = parsed.line_end + 1 + elsif actual_char != "\n" # \n is ignored if it is preceded by an \r (already counted above) + parsed.column_end = parsed.column_end + 1 # everything else gets counted + end + parsed << actual_char + end + end + parsed.source_text = parsed.to_s.clone + parsed + end + + def eql?(other) + other.instance_of? StringParslet and + other.expected_string == @expected_string + end + + protected + + # For equality comparisons. + attr_reader :expected_string + + private + + def expected_string=(string) + @expected_string = (string.clone rescue string) + update_hash + end + + def update_hash + # fixed offset to avoid collisions with @parseable objects + @hash = @expected_string.hash + 20 + end + end # class StringParslet +end # module Walrat diff --git a/lib/walrat/string_result.rb b/lib/walrat/string_result.rb new file mode 100644 index 0000000..7fd9cfd --- /dev/null +++ b/lib/walrat/string_result.rb @@ -0,0 +1,34 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + class StringResult < String + include Walrat::LocationTracking + + def initialize string = "" + self.source_text = string + super + end + end # class StringResult +end # module Walrat diff --git a/lib/walrat/symbol_parslet.rb b/lib/walrat/symbol_parslet.rb new file mode 100755 index 0000000..eac2b3b --- /dev/null +++ b/lib/walrat/symbol_parslet.rb @@ -0,0 +1,82 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'walrat' + +module Walrat + # A SymbolParslet allows for evaluation of a parslet to be deferred until + # runtime (or parse time, to be more precise). + class SymbolParslet < Parslet + attr_reader :hash + + def initialize symbol + raise ArgumentError, 'nil symbol' if symbol.nil? + @symbol = symbol + + # fixed offset to avoid collisions with @parseable objects + @hash = @symbol.hash + 20 + end + + # SymbolParslets don't actually know what Grammar they are associated with + # at the time of their definition. They expect the Grammar to be passed in + # with the options hash under the ":grammar" key. + # Raises if string is nil, or if the options hash does not include a + # :grammar key. + def parse string, options = {} + raise ArgumentError if string.nil? + raise ArgumentError unless options.has_key?(:grammar) + grammar = options[:grammar] + augmented_options = options.clone + augmented_options[:rule_name] = @symbol + augmented_options[:skipping_override] = grammar.skipping_overrides[@symbol] if grammar.skipping_overrides.has_key?(@symbol) + result = grammar.rules[@symbol].memoizing_parse(string, augmented_options) + grammar.wrap(result, @symbol) + end + + # We override the to_s method as it can make parsing error messages more + # readable. Instead of messages like this: + # + # predicate not satisfied (expected "#") + # while parsing "hello world" + # + # We can print messages like this: + # + # predicate not satisfied (expected "rule: end_of_input") while parsing + # "hello world" + def to_s + 'rule: ' + @symbol.to_s + end + + def ==(other) + eql?(other) + end + + def eql?(other) + other.instance_of? SymbolParslet and other.symbol == @symbol + end + + protected + + # For equality comparisons. + attr_reader :symbol + end # class SymbolParslet +end # module Walrat diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100755 index 0000000..76c7dbc --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,43 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require 'pathname' +require 'rubygems' +require 'spec' + +module Walrus + module SpecHelper + # will append the local "lib" and "ext" directories to search path if not + # already present + base = File.expand_path '..', File.dirname(__FILE__) + LIBDIR = Pathname.new(File.join base, 'lib').realpath + TOOL = Pathname.new(File.join base, 'bin', 'walrus').realpath + + # normalize all paths in the load path + normalized = $:.collect { |path| Pathname.new(path).realpath rescue path } + + # only add the directory if it does not appear to be present already + $:.push(LIBDIR) unless normalized.include?(LIBDIR) + end # module SpecHelper +end # module Walrus + +require 'walrus' diff --git a/spec/walrat/additions/proc_spec.rb b/spec/walrat/additions/proc_spec.rb new file mode 100755 index 0000000..aff4952 --- /dev/null +++ b/spec/walrat/additions/proc_spec.rb @@ -0,0 +1,31 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../../spec_helper', File.dirname(__FILE__)) + +describe 'proc additions' do + it 'responds to "to_parseable", "parse" and "memoizing_parse"' do + proc = lambda { |string, options| 'foo' }.to_parseable + proc.parse('bar').should == 'foo' + proc.memoizing_parse('bar').should == 'foo' + end +end diff --git a/spec/walrat/additions/regexp_spec.rb b/spec/walrat/additions/regexp_spec.rb new file mode 100755 index 0000000..de344c6 --- /dev/null +++ b/spec/walrat/additions/regexp_spec.rb @@ -0,0 +1,52 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../../spec_helper', File.dirname(__FILE__)) + +# For more detailed specification of the RegexpParslet behaviour see +# regexp_parslet_spec.rb. +describe 'using shorthand to get RegexpParslets from Regexp instances' do + context 'chaining two Regexps with the "&" operator' do + it 'yields a two-element sequence' do + sequence = /foo/ & /bar/ + sequence.parse('foobar').map { |each| each.to_s }.should == ['foo', 'bar'] + end + end + + context 'chaining three Regexps with the "&" operator' do + it 'yields a three-element sequence' do + sequence = /foo/ & /bar/ & /\.\.\./ + sequence.parse('foobar...').map { |each| each.to_s }.should == ['foo', 'bar', '...'] + end + end + + context 'alternating two Regexps with the "|" operator' do + it 'yields a MatchDataWrapper' do + sequence = /foo/ | /bar/ + sequence.parse('foobar').to_s.should == 'foo' + sequence.parse('bar...').to_s.should == 'bar' + expect do + sequence.parse('no match') + end.to raise_error(Walrat::ParseError) + end + end +end diff --git a/spec/walrat/additions/string_spec.rb b/spec/walrat/additions/string_spec.rb new file mode 100755 index 0000000..2b5aa4d --- /dev/null +++ b/spec/walrat/additions/string_spec.rb @@ -0,0 +1,112 @@ +# encoding: utf-8 +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../../spec_helper', File.dirname(__FILE__)) + +describe String do + describe '#to_class_name' do + it 'works with require names' do + 'foo_bar'.to_class_name.should == 'FooBar' + end + + it 'works with a single-letter' do + 'f'.to_class_name.should == 'F' + end + + it 'works with double-underscores' do + 'foo__bar'.to_class_name.should == 'FooBar' + end + + it 'works with terminating double-underscores' do + 'foo__'.to_class_name.should == 'Foo' + end + end +end + +describe 'iterating over a string' do + # formerly a bug: the StringScanner used under the covers was returning nil + # (stopping) on hitting a newline + it 'should be able to iterate over strings containing newlines' do + chars = [] + "hello\nworld".each_char { |c| chars << c } + chars.should == ['h', 'e', 'l', 'l', 'o', "\n", + 'w', 'o', 'r', 'l', 'd'] + end +end + +describe 'working with Unicode strings' do + before do + # € (Euro) is a three-byte UTF-8 glyph: "\342\202\254" + @string = 'Unicode €!' + end + + it 'the "each_char" method should work with multibyte characters' do + chars = [] + @string.each_char { |c| chars << c } + chars.should == ['U', 'n', 'i', 'c', 'o', 'd', 'e', ' ', '€', '!'] + end + + it 'the "chars" method should work with multibyte characters' do + @string.chars.to_a.should == ['U', 'n', 'i', 'c', 'o', 'd', 'e', ' ', '€', '!'] + end + + it 'should be able to use "enumerator" convenience method to get a string enumerator' do + enumerator = 'hello€'.enumerator + enumerator.next.should == 'h' + enumerator.next.should == 'e' + enumerator.next.should == 'l' + enumerator.next.should == 'l' + enumerator.next.should == 'o' + enumerator.next.should == '€' + enumerator.next.should be_nil + end + + it 'the "jlength" method should correctly report the number of characters in a string' do + @string.jlength.should == 10 + "€".jlength.should == 1 # three bytes long, but one character + end +end + +# For more detailed specification of the StringParslet behaviour see +# string_parslet_spec.rb. +describe 'using shorthand to get StringParslets from String instances' do + it 'chaining two Strings with the "&" operator should yield a two-element sequence' do + sequence = 'foo' & 'bar' + sequence.parse('foobar').should == ['foo', 'bar'] + lambda { sequence.parse('no match') }.should raise_error(Walrat::ParseError) + end + + it 'chaining three Strings with the "&" operator should yield a three-element sequence' do + sequence = 'foo' & 'bar' & '...' + sequence.parse('foobar...').should == ['foo', 'bar', '...'] + lambda { sequence.parse('no match') }.should raise_error(Walrat::ParseError) + end + + it 'alternating two Strings with the "|" operator should yield a single string' do + sequence = 'foo' | 'bar' + sequence.parse('foo').should == 'foo' + sequence.parse('foobar').should == 'foo' + sequence.parse('bar').should == 'bar' + lambda { sequence.parse('no match') }.should raise_error(Walrat::ParseError) + end +end diff --git a/spec/walrat/and_predicate_spec.rb b/spec/walrat/and_predicate_spec.rb new file mode 100755 index 0000000..fb5abdd --- /dev/null +++ b/spec/walrat/and_predicate_spec.rb @@ -0,0 +1,39 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper.rb', File.dirname(__FILE__)) + +describe Walrat::AndPredicate do + subject { Walrat::AndPredicate.new('foo') } + + it 'complains on trying to parse a nil string' do + expect do + subject.parse nil + end.to raise_error(ArgumentError) + end + + it 'is able to compare for equality' do + should eql(Walrat::AndPredicate.new('foo')) # same + should_not eql(Walrat::AndPredicate.new('bar')) # different + should_not eql(Walrat::Predicate.new('foo')) # same but different class + end +end diff --git a/spec/walrat/continuation_wrapper_exception_spec.rb b/spec/walrat/continuation_wrapper_exception_spec.rb new file mode 100755 index 0000000..a9b8ca6 --- /dev/null +++ b/spec/walrat/continuation_wrapper_exception_spec.rb @@ -0,0 +1,31 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe 'creating a continuation wrapper exception' do + it 'complains if initialized with nil' do + expect do + Walrat::ContinuationWrapperException.new nil + end.to raise_error(ArgumentError, /nil continuation/) + end +end diff --git a/spec/walrat/grammar_spec.rb b/spec/walrat/grammar_spec.rb new file mode 100755 index 0000000..9caa91a --- /dev/null +++ b/spec/walrat/grammar_spec.rb @@ -0,0 +1,535 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::Grammar do + describe '::rules' do + it 'complains if either parameter is nil' do + expect do + class AxeGrammar < Walrat::Grammar + rule nil, 'expression' + end + end.to raise_error(ArgumentError, /nil symbol/) + + expect do + class BoneGrammar < Walrat::Grammar + rule :my_rule, nil + end + end.to raise_error(ArgumentError, /nil parseable/) + + expect do + class CatGrammar < Walrat::Grammar + rule nil, nil + end + end.to raise_error(ArgumentError, /nil/) + end + + it 'complains if an attempt is made to define a rule a second time' do + expect do + class DogGrammar < Walrat::Grammar + rule :my_rule, 'foo' + rule :my_rule, 'bar' + end + end.to raise_error(ArgumentError, /already defined/) + end + end + + describe 'defining productions in a grammar' do + it '"node" method should complain if new class name is nil' do + expect do + class NodeComplainingGrammar < Walrat::Grammar + node nil + end + end.to raise_error(ArgumentError, /nil new_class_name/) + end + + it 'should be able to define a simple Node subclass using the "node" function' do + class NodeGrammar1 < Walrat::Grammar + node :my_node_subclass + node :my_subclass_of_a_subclass, :my_node_subclass + end + + NodeGrammar1::MyNodeSubclass.superclass.should == Walrat::Node + NodeGrammar1::MySubclassOfASubclass.superclass.should == NodeGrammar1::MyNodeSubclass + end + + it 'should complain if an attempt is made to create the same production class twice' do + expect do + class HowToGetControlOfJavaAwayFromSun < Walrat::Grammar + rule :foo, 'foo' + node :foo + production :foo + production :foo + end + end.to raise_error(ArgumentError, /production already defined/) + end + + it 'should complain if an attempt is made to create a production for a rule that does not exist yet' do + expect do + class GettingControlOfJavaAwayFromSun < Walrat::Grammar + node :foo + production :foo + end + end.to raise_error(ArgumentError, /non-existent rule/) + end + end + + describe 'parsing using a grammar' do + it 'should complain if asked to parse a nil string' do + class BobGrammar < Walrat::Grammar; end + expect do + BobGrammar.new.parse(nil) + end.to raise_error(ArgumentError, /nil string/) + end + + it 'should complain if trying to parse without first defining a start symbol' do + class RoyalGrammar < Walrat::Grammar; end + expect do + RoyalGrammar.new.parse('foo') + end.to raise_error(RuntimeError, /starting symbol not defined/) + end + + it 'should parse starting with the start symbol' do + class AliceGrammar < Walrat::Grammar + rule :expr, /\w+/ + starting_symbol :expr + end + + grammar = AliceGrammar.new + grammar.parse('foo').should == 'foo' + lambda { grammar.parse('') }.should raise_error(Walrat::ParseError) + end + + it 'should complain if reference is made to an undefined symbol' do + class RoyGrammar < Walrat::Grammar + starting_symbol :expr # :expr is not defined + end + + expect do + RoyGrammar.new.parse('foo') + end.should raise_error(/no rule for key/) + end + + it 'should be able to parse using a simple grammar (one rule)' do + class SimpleGrammar < Walrat::Grammar + starting_symbol :foo + rule :foo, 'foo!' + end + + grammar = SimpleGrammar.new + grammar.parse('foo!').should == 'foo!' + lambda { grammar.parse('---') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to parse using a simple grammar (two rules)' do + class AlmostAsSimpleGrammar < Walrat::Grammar + starting_symbol :foo + rule :foo, 'foo!' | :bar + rule :bar, /bar/ + end + + grammar = AlmostAsSimpleGrammar.new + grammar.parse('foo!').should == 'foo!' + grammar.parse('bar').should == 'bar' + lambda { grammar.parse('---') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to parse using a simple grammar (three rules)' do + # a basic version written using intermediary parslets + # (really two parslets and one rule) + class MacGrammar < Walrat::Grammar + starting_symbol :comment + + # parslets + comment_marker = '##' + comment_body = /.+/ + + # rules + rule :comment, comment_marker & comment_body.optional + end + + grammar = MacGrammar.new + grammar.parse('## hello!').should == ['##', ' hello!'] + grammar.parse('##').should == '##' + lambda { grammar.parse('foobar') }.should raise_error(Walrat::ParseError) + + # the same grammar rewritten without intermediary parslets + # (three rules, no standalone parslets) + class MacAltGrammar < Walrat::Grammar + starting_symbol :comment + rule :comment, :comment_marker & :comment_body.optional + rule :comment_marker, '##' + rule :comment_body, /.+/ + end + + grammar = MacAltGrammar.new + grammar.parse('## hello!').should == ['##', ' hello!'] + grammar.parse('##').should == '##' + lambda { grammar.parse('foobar') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to parse using recursive rules (nested parentheses)' do + # basic example + class NestedGrammar < Walrat::Grammar + starting_symbol :bracket_expression + rule :left_bracket, '(' + rule :right_bracket, ')' + rule :bracket_content, (/[^()]+/ | :bracket_expression).zero_or_more + rule :bracket_expression, :left_bracket & :bracket_content.optional & :right_bracket + end + + grammar = NestedGrammar.new + grammar.parse('()').should == ['(', ')'] + grammar.parse('(content)').should == ['(', 'content', ')'] + grammar.parse('(content (and more content))').should == ['(', ['content ', ['(', 'and more content', ')']], ')'] + lambda { grammar.parse('(') }.should raise_error(Walrat::ParseError) + + # same example but automatically skipping the delimiting braces for clearer output + class NestedSkippingGrammar < Walrat::Grammar + starting_symbol :bracket_expression + rule :bracket_expression, '('.skip & (/[^()]+/ | :bracket_expression).zero_or_more & ')'.skip + end + + grammar = NestedSkippingGrammar.new + grammar.parse('()').should == [] + grammar.parse('(content)').should == 'content' + grammar.parse('(content (and more content))').should == ['content ', 'and more content'] + grammar.parse('(content (and more content)(and more))').should == ['content ', 'and more content', 'and more'] + grammar.parse('(content (and more content)(and more)(more still))').should == ['content ', 'and more content', 'and more', 'more still'] + grammar.parse('(content (and more content)(and more(more still)))').should == ['content ', 'and more content', ['and more', 'more still']] + lambda { grammar.parse('(') }.should raise_error(Walrat::ParseError) + + # note that this confusing (possible even misleading) nesting goes away if you use a proper AST + class NestedBracketsWithAST < Walrat::Grammar + starting_symbol :bracket_expression + rule :text_expression, /[^()]+/ + rule :bracket_expression, + '('.skip & + (:text_expression | :bracket_expression).zero_or_more & + ')'.skip + node :bracket_expression + production :bracket_expression, :children + end + + # simple tests + grammar = NestedBracketsWithAST.new + grammar.parse('()').children.should == [] + grammar.parse('(content)').children.to_s.should == 'content' + + # nested test: two expressions at the first level, one of them nested + results = grammar.parse('(content (and more content))') + results.children[0].should == 'content ' + results.children[1].children.to_s.should == 'and more content' + + # nested test: three expressions at first level, two of them nested + results = grammar.parse('(content (and more content)(and more))')#.should == ['content ', 'and more content', 'and more'] + results.children[0].should == 'content ' + results.children[1].children.should == 'and more content' + results.children[2].children.should == 'and more' + + # nested test: four expressions at the first level, three of them nested + results = grammar.parse('(content (and more content)(and more)(more still))') + results.children[0].should == 'content ' + results.children[1].children.should == 'and more content' + results.children[2].children.should == 'and more' + results.children[3].children.should == 'more still' + + # nested test: three expressions at the first level, one nested and another not only nested but containing another level of nesting + results = grammar.parse('(content (and more content)(and more(more still)))') + results.children[0].should == 'content ' + results.children[1].children.should == 'and more content' + results.children[2].children[0].should == 'and more' + results.children[2].children[1].children.should == 'more still' + + # bad input case + lambda { grammar.parse('(') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to parse using recursive rules (nested comments)' do + class NestedCommentsGrammar < Walrat::Grammar + starting_symbol :comment + rule :comment_start, '/*' + rule :comment_end, '*/' + rule :comment_content, (:comment | /\/+/ | ('*' & '/'.not!) | /[^*\/]+/).zero_or_more + rule :comment, '/*' & :comment_content.optional & '*/' + end + + grammar = NestedCommentsGrammar.new + grammar.parse('/**/').should == ['/*', '*/'] + grammar.parse('/*comment*/').should == ['/*', 'comment', '*/'] + grammar.parse('/* comment /* nested */*/').should == ['/*', [' comment ', ['/*', ' nested ', '*/']], '*/'] + lambda { grammar.parse('/*') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to write a grammar that produces an AST for a simple language that supports addition and assignment' do + class SimpleASTLanguage < Walrat::Grammar + starting_symbol :expression + + # terminal tokens + rule :identifier, /[a-zA-Z_][a-zA-Z0-9_]*/ + node :identifier + production :identifier + rule :integer_literal, /[0-9]+/ + node :integer_literal + production :integer_literal + + # expressions + rule :expression, :assignment_expression | :addition_expression | :identifier | :integer_literal + node :expression + rule :assignment_expression, :identifier & '='.skip & :expression + node :assignment_expression, :expression + production :assignment_expression, :target, :value + rule :addition_expression, (:identifier | :integer_literal) & '+'.skip & :expression + node :addition_expression, :expression + production :addition_expression, :summee, :summor + end + + grammar = SimpleASTLanguage.new + results = grammar.parse('hello') + results.should be_kind_of(SimpleASTLanguage::Identifier) + results.lexeme.should == 'hello' + + results = grammar.parse('1234') + results.should be_kind_of(SimpleASTLanguage::IntegerLiteral) + results.lexeme.should == '1234' + + results = grammar.parse('foo=bar') + results.should be_kind_of(SimpleASTLanguage::Expression) + results.should be_kind_of(SimpleASTLanguage::AssignmentExpression) + results.target.should be_kind_of(SimpleASTLanguage::Identifier) + results.target.lexeme.should == 'foo' + results.value.should be_kind_of(SimpleASTLanguage::Identifier) + results.value.lexeme.should == 'bar' + + results = grammar.parse('baz+123') + results.should be_kind_of(SimpleASTLanguage::Expression) + results.should be_kind_of(SimpleASTLanguage::AdditionExpression) + results.summee.should be_kind_of(SimpleASTLanguage::Identifier) + results.summee.lexeme.should == 'baz' + results.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral) + results.summor.lexeme.should == '123' + + results = grammar.parse('foo=abc+123') + results.should be_kind_of(SimpleASTLanguage::Expression) + results.should be_kind_of(SimpleASTLanguage::AssignmentExpression) + results.target.should be_kind_of(SimpleASTLanguage::Identifier) + results.target.lexeme.should == 'foo' + results.value.should be_kind_of(SimpleASTLanguage::AdditionExpression) + results.value.summee.should be_kind_of(SimpleASTLanguage::Identifier) + results.value.summee.lexeme.should == 'abc' + results.value.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral) + results.value.summor.lexeme.should == '123' + + results = grammar.parse('a+b+2') + results.should be_kind_of(SimpleASTLanguage::Expression) + results.should be_kind_of(SimpleASTLanguage::AdditionExpression) + results.summee.should be_kind_of(SimpleASTLanguage::Identifier) + results.summee.lexeme.should == 'a' + results.summor.should be_kind_of(SimpleASTLanguage::AdditionExpression) + results.summor.summee.should be_kind_of(SimpleASTLanguage::Identifier) + results.summor.summee.lexeme.should == 'b' + results.summor.summor.should be_kind_of(SimpleASTLanguage::IntegerLiteral) + results.summor.summor.lexeme.should == '2' + end + + it 'should be able to write a grammar that complains if all the input is not consumed' do + class ComplainingGrammar < Walrat::Grammar + starting_symbol :translation_unit + rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string + rule :end_of_string, /\z/ + rule :whitespace, /\s+/ + rule :word, /[a-z]+/ + rule :word_list, :word >> (:whitespace.skip & :word).zero_or_more + end + + grammar = ComplainingGrammar.new + grammar.parse('').should == '' + grammar.parse('foo').should == 'foo' + grammar.parse('foo bar').should == ['foo', 'bar'] + lambda { grammar.parse('...') }.should raise_error(Walrat::ParseError) + lambda { grammar.parse('foo...') }.should raise_error(Walrat::ParseError) + lambda { grammar.parse('foo bar...') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to define a default parslet for intertoken skipping' do + # simple example + class SkippingGrammar < Walrat::Grammar + starting_symbol :translation_unit + skipping :whitespace_and_newlines + rule :whitespace_and_newlines, /[\s\n\r]+/ + rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string + rule :end_of_string, /\z/ + rule :word_list, :word.zero_or_more + rule :word, /[a-z0-9_]+/ + end + + # not sure if I can justify the difference in behaviour here compared with the previous grammar + # if I catch these throws at the grammar level I can return nil + # but note that the previous grammar returns an empty array, which to_s is just "" + grammar = SkippingGrammar.new + lambda { grammar.parse('') }.should throw_symbol(:AndPredicateSuccess) + + grammar.parse('foo').should == 'foo' + grammar.parse('foo bar').should == ['foo', 'bar'] # intervening whitespace + grammar.parse('foo bar ').should == ['foo', 'bar'] # trailing whitespace + grammar.parse(' foo bar').should == ['foo', 'bar'] # leading whitespace + + # additional example, this time involving the ">>" pseudo-operator + class SkippingAndMergingGrammar < Walrat::Grammar + starting_symbol :translation_unit + skipping :whitespace_and_newlines + rule :whitespace_and_newlines, /[\s\n\r]+/ + rule :translation_unit, :word_list & :end_of_string.and? | :end_of_string + rule :end_of_string, /\z/ + rule :word_list, :word >> (','.skip & :word).zero_or_more + rule :word, /[a-z0-9_]+/ + end + + # one word + grammar = SkippingAndMergingGrammar.new + grammar.parse('foo').should == 'foo' + + # two words + grammar.parse('foo,bar').should == ['foo', 'bar'] # no whitespace + grammar.parse('foo, bar').should == ['foo', 'bar'] # whitespace after + grammar.parse('foo ,bar').should == ['foo', 'bar'] # whitespace before + grammar.parse('foo , bar').should == ['foo', 'bar'] # whitespace before and after + grammar.parse('foo , bar ').should == ['foo', 'bar'] # trailing and embedded whitespace + grammar.parse(' foo , bar').should == ['foo', 'bar'] # leading and embedded whitespace + + # three or four words + grammar.parse('foo , bar, baz').should == ['foo', 'bar', 'baz'] + grammar.parse(' foo , bar, baz ,bin').should == ['foo', 'bar', 'baz', 'bin'] + end + + it 'should complain if trying to set default skipping parslet more than once' do + expect do + class SetSkipperTwice < Walrat::Grammar + skipping :first # fine + skipping :again # should raise here + end + end.should raise_error(/default skipping parslet already set/) + end + + it 'should complain if passed nil' do + expect do + class PassNilToSkipping < Walrat::Grammar + skipping nil + end + end.should raise_error(ArgumentError, /nil rule_or_parslet/) + end + + it 'should be able to override default skipping parslet on a per-rule basis' do + # the example grammar parses word lists and number lists + class OverrideDefaultSkippingParslet < Walrat::Grammar + starting_symbol :translation_unit + skipping :whitespace_and_newlines + rule :whitespace_and_newlines, /\s+/ # any whitespace including newlines + rule :whitespace, /[ \t\v]+/ # literally only spaces, tabs, not newlines etc + rule :translation_unit, :component.one_or_more & :end_of_string.and? | :end_of_string + rule :end_of_string, /\z/ + rule :component, :word_list | :number_list + rule :word_list, :word.one_or_more + rule :word, /[a-z]+/ + rule :number, /[0-9]+/ + + # the interesting bit: we override the skipping rule for number lists + rule :number_list, :number.one_or_more + skipping :number_list, :whitespace # only whitespace, no newlines + end + + # words in word lists can be separated by whitespace or newlines + grammar = OverrideDefaultSkippingParslet.new + grammar.parse('hello world').should == ['hello', 'world'] + grammar.parse("hello\nworld").should == ['hello', 'world'] + grammar.parse("hello world\nworld hello").should == ['hello', 'world', 'world', 'hello'] + + # numbers in number lists may be separated only by whitespace, not newlines + grammar.parse('123 456').should == ['123', '456'] + grammar.parse("123\n456").should == ['123', '456'] # this succeeds because parser treats them as two separate number lists + grammar.parse("123 456\n456 123").should == [['123', '456'], ['456', '123']] + + # intermixing word lists and number lists + grammar.parse("bar\n123").should == ['bar', '123'] + grammar.parse("123\n456\nbar").should == ['123', '456', 'bar'] + + # these were buggy at one point: "123\n456" was getting mashed into "123456" due to misguided use of String#delete! to delete first newline + grammar.parse("\n123\n456").should == ['123', '456'] + grammar.parse("bar\n123\n456").should == ['bar', '123', '456'] + grammar.parse("baz bar\n123\n456").should == [['baz', 'bar'], '123', '456'] + grammar.parse("hello world\nfoo\n123 456 baz bar\n123\n456").should == [['hello', 'world', 'foo'], ['123', '456'], ['baz', 'bar'], '123', '456'] + end + + it 'should complain if trying to override the default for the same rule twice' do + expect do + class OverrideSameRuleTwice < Walrat::Grammar + rule :the_rule, 'foo' + skipping :the_rule, :the_override # fine + skipping :the_rule, :the_override # should raise + end + end.to raise_error(ArgumentError, /skipping override already set for rule/) + end + + it "should complain if trying to set an override for a rule that hasn't been defined yet" do + expect do + class OverrideUndefinedRule < Walrat::Grammar + skipping :non_existent_rule, :the_override + end + end.to raise_error(ArgumentError, /non-existent rule/) + end + + it 'use of the "skipping" directive should play nicely with predicates' do + # example 1: word + predicate + class NicePlayer < Walrat::Grammar + starting_symbol :foo + skipping :whitespace + rule :whitespace, /[ \t\v]+/ + rule :foo, 'hello' & 'world'.and? + end + + grammar = NicePlayer.new + grammar.parse('hello world').should == 'hello' + grammar.parse('hello world').should == 'hello' + grammar.parse('helloworld').should == 'hello' + lambda { grammar.parse('hello') }.should raise_error(Walrat::ParseError) + lambda { grammar.parse('hello buddy') }.should raise_error(Walrat::ParseError) + lambda { grammar.parse("hello\nbuddy") }.should raise_error(Walrat::ParseError) + + # example 2: word + predicate + other word + class NicePlayer2 < Walrat::Grammar + starting_symbol :foo + skipping :whitespace + rule :whitespace, /[ \t\v]+/ + rule :foo, /hel../ & 'world'.and? & /\w+/ + end + + grammar = NicePlayer2.new + grammar.parse('hello world').should == ['hello', 'world'] + grammar.parse('hello world').should == ['hello', 'world'] + grammar.parse('helloworld').should == ['hello', 'world'] + lambda { grammar.parse('hello') }.should raise_error(Walrat::ParseError) + lambda { grammar.parse('hello buddy') }.should raise_error(Walrat::ParseError) + lambda { grammar.parse("hello\nbuddy") }.should raise_error(Walrat::ParseError) + end + end +end diff --git a/spec/walrat/match_data_wrapper_spec.rb b/spec/walrat/match_data_wrapper_spec.rb new file mode 100755 index 0000000..d133a34 --- /dev/null +++ b/spec/walrat/match_data_wrapper_spec.rb @@ -0,0 +1,49 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::MatchDataWrapper do + before do + 'hello agent' =~ /(\w+)(\s+)(\w+)/ + @match = Walrat::MatchDataWrapper.new($~) + end + + it 'raises if initialized with nil' do + expect do + Walrat::MatchDataWrapper.new nil + end.to raise_error(ArgumentError, /nil data/) + end + + specify 'stored match data persists after multiple matches are executed' do + original = @match.match_data # store original value + 'foo' =~ /foo/ # clobber $~ + @match.match_data.should == original # confirm stored value still intact + end + + specify 'comparisons with Strings work without having to call "to_s"' do + @match.should == 'hello agent' # normal order + 'hello agent'.should == @match # reverse order + @match.should_not == 'foobar' # inverse test sense (not equal) + 'foobar'.should_not == @match # reverse order + end +end diff --git a/spec/walrat/memoizing_cache_spec.rb b/spec/walrat/memoizing_cache_spec.rb new file mode 100755 index 0000000..9970b4b --- /dev/null +++ b/spec/walrat/memoizing_cache_spec.rb @@ -0,0 +1,109 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::MemoizingCache::NoValueForKey do + it 'is a singleton' do + expect do + Walrat::MemoizingCache::NoValueForKey.new + end.to raise_error(NoMethodError, /private method/) + + Walrat::MemoizingCache::NoValueForKey.instance.object_id. + should == Walrat::MemoizingCache::NoValueForKey.instance.object_id + end + + it 'should be able to use NoValueForKey as the default value for a hash' do + hash = Hash.new Walrat::MemoizingCache::NoValueForKey.instance + hash.default.should == Walrat::MemoizingCache::NoValueForKey.instance + hash[:foo].should == Walrat::MemoizingCache::NoValueForKey.instance + hash[:foo] = 'bar' + hash[:foo].should == 'bar' + hash[:bar].should == Walrat::MemoizingCache::NoValueForKey.instance + end +end + +describe Walrat::MemoizingCache do + it 'parses with memoizing turned on' + it 'parses with memoizing turned off' + specify 'parsing with memoization turned on is faster' +end + +# left-recursion is enabled by code in the memoizer and elsewhere; keep the +# specs here for want of a better place +describe 'working with left-recursive rules' do + specify 'circular rules should cause a short-circuit' do + class InfiniteLoop < Walrat::Grammar + starting_symbol :a + rule :a, :a # a bone-headed rule + end + + grammar = InfiniteLoop.new + expect do + grammar.parse('anything') + end.to raise_error(Walrat::LeftRecursionException) + end + + specify 'shortcuiting is not be fatal if a valid alternative is present' do + class AlmostInfinite < Walrat::Grammar + starting_symbol :a + rule :a, :a | :b # slightly less bone-headed + rule :b, 'foo' + end + + grammar = AlmostInfinite.new + grammar.parse('foo').should == 'foo' + end + + it 'retries after short-circuiting if valid continuation point' do + class MuchMoreRealisticExample < Walrat::Grammar + starting_symbol :a + rule :a, :a & :b | :b + rule :b, 'foo' + end + + # note the right associativity + grammar = MuchMoreRealisticExample.new + grammar.parse('foo').should == 'foo' + grammar.parse('foofoo').should == ['foo', 'foo'] + grammar.parse('foofoofoo').should == [['foo', 'foo'], 'foo'] + grammar.parse('foofoofoofoo').should == [[['foo', 'foo'], 'foo'], 'foo'] + grammar.parse('foofoofoofoofoo').should == [[[['foo', 'foo'], 'foo'], 'foo'], 'foo'] + end + + specify 'right associativity should work when building AST nodes' do + class RightAssociativeAdditionExample < Walrat::Grammar + starting_symbol :addition_expression + rule :term, /\d+/ + rule :addition_expression, + :addition_expression & '+'.skip & :term | :term + node :addition_expression + production :addition_expression, :left, :right + + # TODO: syntax for expressing alternate production? + end + + pending + grammar = RightAssociativeAdditionExample.new + result = grammar.parse('1+2') + end +end diff --git a/spec/walrat/node_spec.rb b/spec/walrat/node_spec.rb new file mode 100755 index 0000000..7d4f033 --- /dev/null +++ b/spec/walrat/node_spec.rb @@ -0,0 +1,27 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::Node do + it 'has behavior' +end diff --git a/spec/walrat/not_predicate_spec.rb b/spec/walrat/not_predicate_spec.rb new file mode 100755 index 0000000..5dd0535 --- /dev/null +++ b/spec/walrat/not_predicate_spec.rb @@ -0,0 +1,40 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::NotPredicate do + it 'complains on trying to parse a nil string' do + expect do + Walrat::NotPredicate.new('irrelevant').parse nil + end.to raise_error(ArgumentError, /nil string/) + end + + it 'can be compared for equality' do + Walrat::NotPredicate.new('foo'). + should eql(Walrat::NotPredicate.new('foo')) # same + Walrat::NotPredicate.new('foo'). + should_not eql(Walrat::NotPredicate.new('bar')) # different + Walrat::NotPredicate.new('foo'). + should_not eql(Walrat::Predicate.new('foo')) # different class + end +end diff --git a/spec/walrat/parser_state_spec.rb b/spec/walrat/parser_state_spec.rb new file mode 100755 index 0000000..3dea464 --- /dev/null +++ b/spec/walrat/parser_state_spec.rb @@ -0,0 +1,173 @@ +# encoding: utf-8 +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::ParserState do + before do + @base_string = 'this is the string to be parsed' + @state = Walrat::ParserState.new @base_string + end + + it 'raises an ArgumentError if initialized with nil' do + expect do + Walrat::ParserState.new nil + end.to raise_error(ArgumentError, /nil string/) + end + + it 'before parsing has started "remainder" should equal the entire string' do + @state.remainder.should == @base_string + end + + it 'before parsing has started "remainder" should equal the entire string (when string is an empty string)' do + Walrat::ParserState.new('').remainder.should == '' + end + + it 'before parsing has started "results" should be empty' do + @state.results.should be_empty + end + + it '"parsed" should complain if passed nil' do + lambda { @state.parsed(nil) }.should raise_error(ArgumentError) + end + + it '"skipped" should complain if passed nil' do + lambda { @state.skipped(nil) }.should raise_error(ArgumentError) + end + + it '"parsed" should return the remainder of the string' do + @state.parsed('this is the ').should == 'string to be parsed' + @state.parsed('string ').should == 'to be parsed' + @state.parsed('to be parsed').should == '' + end + + it '"skipped" should return the remainder of the string' do + @state.skipped('this is the ').should == 'string to be parsed' + @state.skipped('string ').should == 'to be parsed' + @state.skipped('to be parsed').should == '' + end + + it '"results" should return an unwrapped parsed result (for single results)' do + @state.parsed('this') + @state.results.should == 'this' + end + + it 'skipped substrings should not appear in "results"' do + @state.skipped('this') + @state.results.should be_empty + end + + it 'should return an array of the parsed results (for multiple results)' do + @state.parsed('this ') + @state.parsed('is ') + @state.results.should == ['this ', 'is '] + end + + it 'should work when the entire string is consumed in a single operation (using "parsed")' do + @state.parsed(@base_string).should == '' + @state.results.should == @base_string + end + + it 'should work when the entire string is consumed in a single operation (using "skipped")' do + @state.skipped(@base_string).should == '' + @state.results.should be_empty + end + + it '"parsed" should complain if passed something that doesn\'t respond to the "line_end" and "column_end" messages' do + # line_end + my_mock = mock('mock_which_does_not_implement_line_end', :null_object => true) + my_mock.should_receive(:line_end).and_raise(NoMethodError) + lambda { @state.parsed(my_mock) }.should raise_error(NoMethodError) + + # column_end + my_mock = mock('mock_which_does_not_implement_column_end', :null_object => true) + my_mock.should_receive(:column_end).and_raise(NoMethodError) + lambda { @state.parsed(my_mock) }.should raise_error(NoMethodError) + end + + it '"skipped" should complain if passed something that doesn\'t respond to the "line_end" and "column_end" messages' do + # line_end + my_mock = mock('mock_which_does_not_implement_line_end', :null_object => true) + my_mock.should_receive(:line_end).and_raise(NoMethodError) + lambda { @state.skipped(my_mock) }.should raise_error(NoMethodError) + + # column_end + my_mock = mock('mock_which_does_not_implement_column_end', :null_object => true) + my_mock.should_receive(:column_end).and_raise(NoMethodError) + lambda { @state.skipped(my_mock) }.should raise_error(NoMethodError) + end + + it 'should be able to mix use of "parsed" and "skipped" methods' do + # first example + @state.skipped('this is the ').should == 'string to be parsed' + @state.results.should be_empty + @state.parsed('string ').should == 'to be parsed' + @state.results.should == 'string ' + @state.skipped('to be parsed').should == '' + @state.results.should == 'string ' + + # second example (add this test to isolate a bug in another specification) + state = Walrat::ParserState.new('foo1...') + state.skipped('foo').should == '1...' + state.remainder.should == '1...' + state.results.should be_empty + state.parsed('1').should == '...' + state.remainder.should == '...' + state.results.should == '1' + end + + it '"parsed" and "results" methods should work with multi-byte Unicode strings' do + # basic test + state = Walrat::ParserState.new('400€, foo') + state.remainder.should == '400€, foo' + state.parsed('40').should == '0€, foo' + state.results.should == '40' + state.parsed('0€, ').should == 'foo' + state.results.should == ['40', '0€, '] + state.parsed('foo').should == '' + state.results.should == ['40', '0€, ', 'foo'] + + # test with newlines before and after multi-byte chars + state = Walrat::ParserState.new("400\n or more €...\nfoo") + state.remainder.should == "400\n or more €...\nfoo" + state.parsed("400\n or more").should == " €...\nfoo" + state.results.should == "400\n or more" + state.parsed(' €..').should == ".\nfoo" + state.results.should == ["400\n or more", ' €..'] + state.parsed(".\nfoo").should == '' + state.results.should == ["400\n or more", ' €..', ".\nfoo"] + end + + it '"skipped" and "results" methods should work with multi-byte Unicode strings' do + state = Walrat::ParserState.new('400€, foo') + state.remainder.should == '400€, foo' + state.skipped('4').should == '00€, foo' + state.results.should be_empty + state.parsed('0').should == '0€, foo' + state.results.should == '0' + state.skipped('0€, ').should == 'foo' + state.results.should == '0' + state.parsed('foo').should == '' + state.results.should == ['0', 'foo'] + end +end diff --git a/spec/walrat/parslet_choice_spec.rb b/spec/walrat/parslet_choice_spec.rb new file mode 100755 index 0000000..912743d --- /dev/null +++ b/spec/walrat/parslet_choice_spec.rb @@ -0,0 +1,55 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::ParsletChoice do + before do + @p1 = 'foo'.to_parseable + @p2 = 'bar'.to_parseable + end + + it 'hashes should be the same if initialized with the same parseables' do + Walrat::ParsletChoice.new(@p1, @p2).hash.should == Walrat::ParsletChoice.new(@p1, @p2).hash + Walrat::ParsletChoice.new(@p1, @p2).should eql(Walrat::ParsletChoice.new(@p1, @p2)) + end + + it 'hashes should (ideally) be different if initialized with different parseables' do + Walrat::ParsletChoice.new(@p1, @p2).hash.should_not == Walrat::ParsletChoice.new('baz'.to_parseable, 'abc'.to_parseable).hash + Walrat::ParsletChoice.new(@p1, @p2).should_not eql(Walrat::ParsletChoice.new('baz'.to_parseable, 'abc'.to_parseable)) + end + + it 'hashes should be different compared to other similar classes even if initialized with the same parseables' do + Walrat::ParsletChoice.new(@p1, @p2).hash.should_not == Walrat::ParsletSequence.new(@p1, @p2).hash + Walrat::ParsletChoice.new(@p1, @p2).should_not eql(Walrat::ParsletSequence.new(@p1, @p2)) + end + + it 'should be able to use Parslet Choice instances as keys in a hash' do + hash = {} + key1 = Walrat::ParsletChoice.new(@p1, @p2) + key2 = Walrat::ParsletChoice.new('baz'.to_parseable, 'abc'.to_parseable) + hash[:key1] = 'foo' + hash[:key2] = 'bar' + hash[:key1].should == 'foo' + hash[:key2].should == 'bar' + end +end diff --git a/spec/walrat/parslet_combining_spec.rb b/spec/walrat/parslet_combining_spec.rb new file mode 100755 index 0000000..71199d4 --- /dev/null +++ b/spec/walrat/parslet_combining_spec.rb @@ -0,0 +1,265 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe 'using shorthand operators to combine String, Symbol and Regexp parsers' do + it 'should be able to chain a String and a Regexp together' do + # try in one order + sequence = 'foo' & /\d+/ + sequence.parse('foo1000').should == ['foo', '1000'] + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) # first part alone is not enough + lambda { sequence.parse('1000') }.should raise_error(Walrat::ParseError) # neither is second part alone + lambda { sequence.parse('1000foo') }.should raise_error(Walrat::ParseError) # order matters + + # same test but in reverse order + sequence = /\d+/ & 'foo' + sequence.parse('1000foo').should == ['1000', 'foo'] + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) # first part alone is not enough + lambda { sequence.parse('1000') }.should raise_error(Walrat::ParseError) # neither is second part alone + lambda { sequence.parse('foo1000') }.should raise_error(Walrat::ParseError) # order matters + end + + it 'should be able to choose between a String and a Regexp' do + # try in one order + sequence = 'foo' | /\d+/ + sequence.parse('foo').should == 'foo' + sequence.parse('100').should == '100' + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + + # same test but in reverse order + sequence = /\d+/ | 'foo' + sequence.parse('foo').should == 'foo' + sequence.parse('100').should == '100' + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to freely intermix String and Regexp objects when chaining and choosing' do + sequence = 'foo' & /\d+/ | 'bar' & /[XYZ]{3}/ + sequence.parse('foo123').should == ['foo', '123'] + sequence.parse('barZYX').should == ['bar', 'ZYX'] + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('123') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('XYZ') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('barXY') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to specify minimum and maximum repetition using shorthand methods' do + # optional (same as "?" in regular expressions) + sequence = 'foo'.optional + sequence.parse('foo').should == 'foo' + lambda { sequence.parse('bar') }.should throw_symbol(:ZeroWidthParseSuccess) + + # zero_or_one (same as optional; "?" in regular expressions) + sequence = 'foo'.zero_or_one + sequence.parse('foo').should == 'foo' + lambda { sequence.parse('bar') }.should throw_symbol(:ZeroWidthParseSuccess) + + # zero_or_more (same as "*" in regular expressions) + sequence = 'foo'.zero_or_more + sequence.parse('foo').should == 'foo' + sequence.parse('foofoofoobar').should == ['foo', 'foo', 'foo'] + lambda { sequence.parse('bar') }.should throw_symbol(:ZeroWidthParseSuccess) + + # one_or_more (same as "+" in regular expressions) + sequence = 'foo'.one_or_more + sequence.parse('foo').should == 'foo' + sequence.parse('foofoofoobar').should == ['foo', 'foo', 'foo'] + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + + # repeat (arbitary limits for min, max; same as {min, max} in regular expressions) + sequence = 'foo'.repeat(3, 5) + sequence.parse('foofoofoobar').should == ['foo', 'foo', 'foo'] + sequence.parse('foofoofoofoobar').should == ['foo', 'foo', 'foo', 'foo'] + sequence.parse('foofoofoofoofoobar').should == ['foo', 'foo', 'foo', 'foo', 'foo'] + sequence.parse('foofoofoofoofoofoobar').should == ['foo', 'foo', 'foo', 'foo', 'foo'] + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('foofoo') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to apply repetitions to other combinations wrapped in parentheses' do + sequence = ('foo' & 'bar').one_or_more + sequence.parse('foobar').should == ['foo', 'bar'] + sequence.parse('foobarfoobar').should == [['foo', 'bar'], ['foo', 'bar']] # fails: just returns ['foo', 'bar'] + end + + it 'should be able to combine use of repetition shorthand methods with other shorthand methods' do + # first we test with chaining + sequence = 'foo'.optional & 'bar' & 'abc'.one_or_more + sequence.parse('foobarabc').should == ['foo', 'bar', 'abc'] + sequence.parse('foobarabcabc').should == ['foo', 'bar', ['abc', 'abc']] + sequence.parse('barabc').should == ['bar', 'abc'] + lambda { sequence.parse('abc') }.should raise_error(Walrat::ParseError) + + # similar test but with alternation + sequence = 'foo' | 'bar' | 'abc'.one_or_more + sequence.parse('foobarabc').should == 'foo' + sequence.parse('barabc').should == 'bar' + sequence.parse('abc').should == 'abc' + sequence.parse('abcabc').should == ['abc', 'abc'] + lambda { sequence.parse('nothing') }.should raise_error(Walrat::ParseError) + + # test with defective sequence (makes no sense to use "optional" with alternation, will always succeed) + sequence = 'foo'.optional | 'bar' | 'abc'.one_or_more + sequence.parse('foobarabc').should == 'foo' + lambda { sequence.parse('nothing') }.should throw_symbol(:ZeroWidthParseSuccess) + end + + it 'should be able to chain a "not predicate"' do + sequence = 'foo' & 'bar'.not! + sequence.parse('foo').should == 'foo' # fails with ['foo'] because that's the way ParserState works... + sequence.parse('foo...').should == 'foo' # same + lambda { sequence.parse('foobar') }.should raise_error(Walrat::ParseError) + end + + it 'an isolated "not predicate" should return a zero-width match' do + sequence = 'foo'.not! + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('bar') }.should throw_symbol(:NotPredicateSuccess) + end + + it 'two "not predicates" chained together should act like a union' do + # this means "not followed by 'foo' and not followed by 'bar'" + sequence = 'foo'.not! & 'bar'.not! + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('abc') }.should throw_symbol(:NotPredicateSuccess) + end + + it 'should be able to chain an "and predicate"' do + sequence = 'foo' & 'bar'.and? + sequence.parse('foobar').should == 'foo' # same problem, returns ['foo'] + lambda { sequence.parse('foo...') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) + end + + it 'an isolated "and predicate" should return a zero-width match' do + sequence = 'foo'.and? + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('foo') }.should throw_symbol(:AndPredicateSuccess) + end + + it 'should be able to follow an "and predicate" with other parslets or combinations' do + # this is equivalent to "foo" if followed by "bar", or any three characters + sequence = 'foo' & 'bar'.and? | /.../ + sequence.parse('foobar').should == 'foo' # returns ['foo'] + sequence.parse('abc').should == 'abc' + lambda { sequence.parse('') }.should raise_error(Walrat::ParseError) + + # it makes little sense for the predicate to follows a choice operator so we don't test that + end + + it 'should be able to follow a "not predicate" with other parslets or combinations' do + # this is equivalent to "foo" followed by any three characters other than "bar" + sequence = 'foo' & 'bar'.not! & /.../ + sequence.parse('fooabc').should == ['foo', 'abc'] + lambda { sequence.parse('foobar') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('foo') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to include a "not predicate" when using a repetition operator' do + # basic example + sequence = ('foo' & 'bar'.not!).one_or_more + sequence.parse('foo').should == 'foo' + sequence.parse('foofoobar').should == 'foo' + sequence.parse('foofoo').should == ['foo', 'foo'] + lambda { sequence.parse('bar') }.should raise_error(Walrat::ParseError) + lambda { sequence.parse('foobar') }.should raise_error(Walrat::ParseError) + + # variation: note that greedy matching alters the behaviour + sequence = ('foo' & 'bar').one_or_more & 'abc'.not! + sequence.parse('foobar').should == ['foo', 'bar'] + sequence.parse('foobarfoobar').should == [['foo', 'bar'], ['foo', 'bar']] + lambda { sequence.parse('foobarabc') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to use regular expression shortcuts in conjunction with predicates' do + # match "foo" as long as it's not followed by a digit + sequence = 'foo' & /\d/.not! + sequence.parse('foo').should == 'foo' + sequence.parse('foobar').should == 'foo' + lambda { sequence.parse('foo1') }.should raise_error(Walrat::ParseError) + + # match "word" characters as long as they're not followed by whitespace + sequence = /\w+/ & /\s/.not! + sequence.parse('foo').should == 'foo' + lambda { sequence.parse('foo ') }.should raise_error(Walrat::ParseError) + end +end + +describe 'omitting tokens from the output using the "skip" method' do + it 'should be able to skip quotation marks delimiting a string' do + sequence = '"'.skip & /[^"]+/ & '"'.skip + sequence.parse('"hello world"').should == 'hello world' # note this is returning a ParserState object + end + + it 'should be able to skip within a repetition expression' do + sequence = ('foo'.skip & /\d+/).one_or_more + sequence.parse('foo1...').should == '1' + sequence.parse('foo1foo2...').should == ['1', '2'] # only returns 1 + sequence.parse('foo1foo2foo3...').should == ['1', '2', '3'] # only returns 1 + end + + it 'should be able to skip commas separating a list' do + # closer to real-world use: a comma-separated list + sequence = /\w+/ & (/\s*,\s*/.skip & /\w+/).zero_or_more + sequence.parse('a').should == 'a' + sequence.parse('a, b').should == ['a', 'b'] + sequence.parse('a, b, c').should == ['a', ['b', 'c']] + sequence.parse('a, b, c, d').should == ['a', ['b', 'c', 'd']] + + # again, using the ">>" operator + sequence = /\w+/ >> (/\s*,\s*/.skip & /\w+/).zero_or_more + sequence.parse('a').should == 'a' + sequence.parse('a, b').should == ['a', 'b'] + sequence.parse('a, b, c').should == ['a', 'b', 'c'] + sequence.parse('a, b, c, d').should == ['a', 'b', 'c', 'd'] + end +end + +describe 'using the shorthand ">>" pseudo-operator' do + it 'should be able to chain the operator multiple times' do + # comma-separated words followed by comma-separated digits + sequence = /[a-zA-Z]+/ >> (/\s*,\s*/.skip & /[a-zA-Z]+/).zero_or_more >> (/\s*,\s*/.skip & /\d+/).one_or_more + sequence.parse('a, 1').should == ['a', '1'] + sequence.parse('a, b, 1').should == ['a', 'b', '1'] + sequence.parse('a, 1, 2').should == ['a', '1', '2'] + sequence.parse('a, b, 1, 2').should == ['a', 'b', '1', '2'] + + # same, but enclosed in quotes + sequence = '"'.skip & /[a-zA-Z]+/ >> (/\s*,\s*/.skip & /[a-zA-Z]+/).zero_or_more >> (/\s*,\s*/.skip & /\d+/).one_or_more & '"'.skip + sequence.parse('"a, 1"').should == ['a', '1'] + sequence.parse('"a, b, 1"').should == ['a', 'b', '1'] + sequence.parse('"a, 1, 2"').should == ['a', '1', '2'] + sequence.parse('"a, b, 1, 2"').should == ['a', 'b', '1', '2'] + + # alternative construction of same + sequence = /[a-zA-Z]+/ >> (/\s*,\s*/.skip & /[a-zA-Z]+/).zero_or_more & /\s*,\s*/.skip & /\d+/ >> (/\s*,\s*/.skip & /\d+/).zero_or_more + sequence.parse('a, 1').should == ['a', '1'] + sequence.parse('a, b, 1').should == ['a', 'b', '1'] + sequence.parse('a, 1, 2').should == ['a', '1', '2'] + sequence.parse('a, b, 1, 2').should == ['a', 'b', '1', '2'] + end +end diff --git a/spec/walrat/parslet_merge_spec.rb b/spec/walrat/parslet_merge_spec.rb new file mode 100755 index 0000000..ab6b3e9 --- /dev/null +++ b/spec/walrat/parslet_merge_spec.rb @@ -0,0 +1,39 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::ParsletMerge do + it 'should be able to compare for equality' do + Walrat::ParsletMerge.new('foo', 'bar').should eql(Walrat::ParsletMerge.new('foo', 'bar')) + Walrat::ParsletMerge.new('foo', 'bar').should_not eql(Walrat::ParsletOmission.new('foo')) # wrong class + end + + it 'ParsletMerge and ParsletSequence hashs should not match even if created using the same parseable instances' do + parseable1 = 'foo'.to_parseable + parseable2 = 'bar'.to_parseable + p1 = Walrat::ParsletMerge.new(parseable1, parseable2) + p2 = Walrat::ParsletSequence.new(parseable1, parseable2) + p1.hash.should_not == p2.hash + p1.should_not eql(p2) + end +end diff --git a/spec/walrat/parslet_omission_spec.rb b/spec/walrat/parslet_omission_spec.rb new file mode 100755 index 0000000..b9c525d --- /dev/null +++ b/spec/walrat/parslet_omission_spec.rb @@ -0,0 +1,74 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::ParsletOmission do + it 'raises if "parseable" argument is nil' do + expect do + Walrat::ParsletOmission.new nil + end.to raise_error(ArgumentError, /nil parseable/) + end + + it 'complains if passed nil string for parsing' do + expect do + Walrat::ParsletOmission.new('foo'.to_parseable).parse nil + end.to raise_error(ArgumentError, /nil string/) + end + + it 're-raises parse errors from lower levels' do + expect do + Walrat::ParsletOmission.new('foo'.to_parseable).parse 'bar' + end.to raise_error(Walrat::ParseError) + end + + it 'indicates parse errors with a SubstringSkippedException' do + expect do + Walrat::ParsletOmission.new('foo'.to_parseable).parse 'foo' + end.to raise_error(Walrat::SkippedSubstringException) + end + + specify 'the raised SubstringSkippedException includes the parsed substring' do + begin + Walrat::ParsletOmission.new('foo'.to_parseable).parse 'foobar' + rescue Walrat::SkippedSubstringException => e + substring = e.to_s + end + substring.should == 'foo' + end + + specify 'the parsed substring is an an empty string in the case of a zero-width parse success at a lower level' do + begin + Walrat::ParsletOmission.new('foo'.optional).parse 'bar' # a contrived example + rescue Walrat::SkippedSubstringException => e + substring = e.to_s + end + substring.should == '' + end + + it 'can be compared for equality' do + Walrat::ParsletOmission.new('foo'). + should eql(Walrat::ParsletOmission.new('foo')) + Walrat::ParsletOmission.new('foo'). + should_not eql(Walrat::ParsletOmission.new('bar')) + end +end diff --git a/spec/walrat/parslet_repetition_spec.rb b/spec/walrat/parslet_repetition_spec.rb new file mode 100755 index 0000000..262f940 --- /dev/null +++ b/spec/walrat/parslet_repetition_spec.rb @@ -0,0 +1,103 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::ParsletRepetition do + it 'raises if "parseable" argument is nil' do + expect do + Walrat::ParsletRepetition.new nil, 0 + end.to raise_error(ArgumentError, /nil parseable/) + end + + it 'raises if "min" argument is nil' do + expect do + Walrat::ParsletRepetition.new 'foo'.to_parseable, nil + end.to raise_error(ArgumentError, /nil min/) + end + + it 'raises if passed nil string for parsing' do + expect do + Walrat::ParsletRepetition.new('foo'.to_parseable, 0).parse nil + end.to raise_error(ArgumentError, /nil string/) + end + + it 'should be able to match "zero or more" times (like "*" in regular expressions)' do + parslet = Walrat::ParsletRepetition.new 'foo'.to_parseable, 0 + expect do + parslet.parse 'bar' + end.to throw_symbol(:ZeroWidthParseSuccess) # zero times + parslet.parse('foo').should == 'foo' # one time + parslet.parse('foofoo').should == ['foo', 'foo'] # two times + parslet.parse('foofoofoobar').should == ['foo', 'foo', 'foo'] # three times + end + + it 'should be able to match "zero or one" times (like "?" in regular expressions)' do + parslet = Walrat::ParsletRepetition.new 'foo'.to_parseable, 0, 1 + expect do + parslet.parse 'bar' + end.to throw_symbol(:ZeroWidthParseSuccess) # zero times + parslet.parse('foo').should == 'foo' # one time + parslet.parse('foofoo').should == 'foo' # stop at one time + end + + it 'should be able to match "one or more" times (like "+" in regular expressions)' do + parslet = Walrat::ParsletRepetition.new 'foo'.to_parseable, 1 + expect do + parslet.parse 'bar' + end.to raise_error(Walrat::ParseError) # zero times (error) + parslet.parse('foo').should == 'foo' # one time + parslet.parse('foofoo').should == ['foo', 'foo'] # two times + parslet.parse('foofoofoobar').should == ['foo', 'foo', 'foo'] # three times + end + + it 'should be able to match "between X and Y" times (like {X, Y} in regular expressions)' do + parslet = Walrat::ParsletRepetition.new 'foo'.to_parseable, 2, 3 + expect do + parslet.parse 'bar' + end.to raise_error(Walrat::ParseError) # zero times (error) + expect do + parslet.parse 'foo' + end.to raise_error(Walrat::ParseError) # one time (error) + parslet.parse('foofoo').should == ['foo', 'foo'] # two times + parslet.parse('foofoofoo').should == ['foo', 'foo', 'foo'] # three times + parslet.parse('foofoofoofoo').should == ['foo', 'foo', 'foo'] # stop at three times + end + + it 'matches should be greedy' do + # here the ParsletRepetition should consume all the "foos", leaving nothing + # for the final parslet + parslet = Walrat::ParsletRepetition.new('foo'.to_parseable, 1) & 'foo' + expect do + parslet.parse 'foofoofoofoo' + end.to raise_error(Walrat::ParseError) + end + + it 'should be able to compare for equality' do + Walrat::ParsletRepetition.new('foo'.to_parseable, 1). + should eql(Walrat::ParsletRepetition.new('foo'.to_parseable, 1)) + Walrat::ParsletRepetition.new('foo'.to_parseable, 1). + should_not eql(Walrat::ParsletRepetition.new('bar'.to_parseable, 1)) + Walrat::ParsletRepetition.new('foo'.to_parseable, 1). + should_not eql(Walrat::ParsletRepetition.new('foo'.to_parseable, 2)) + end +end diff --git a/spec/walrat/parslet_sequence_spec.rb b/spec/walrat/parslet_sequence_spec.rb new file mode 100755 index 0000000..e3f3320 --- /dev/null +++ b/spec/walrat/parslet_sequence_spec.rb @@ -0,0 +1,61 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::ParsletSequence do + before do + @p1 = 'foo'.to_parseable + @p2 = 'bar'.to_parseable + end + + it 'hashes should be the same if initialized with the same parseables' do + Walrat::ParsletSequence.new(@p1, @p2).hash. + should == Walrat::ParsletSequence.new(@p1, @p2).hash + Walrat::ParsletSequence.new(@p1, @p2). + should eql(Walrat::ParsletSequence.new(@p1, @p2)) + end + + it 'hashes should (ideally) be different if initialized with different parseables' do + Walrat::ParsletSequence.new(@p1, @p2).hash. + should_not == Walrat::ParsletSequence.new('baz'.to_parseable, 'abc'.to_parseable).hash + Walrat::ParsletSequence.new(@p1, @p2). + should_not eql(Walrat::ParsletSequence.new('baz'.to_parseable, 'abc'.to_parseable)) + end + + it 'hashes should be different compared to other similar classes even if initialized with the same parseables' do + Walrat::ParsletSequence.new(@p1, @p2).hash. + should_not == Walrat::ParsletChoice.new(@p1, @p2).hash + Walrat::ParsletSequence.new(@p1, @p2). + should_not eql(Walrat::ParsletChoice.new(@p1, @p2)) + end + + it 'should be able to use Parslet Choice instances as keys in a hash' do + hash = {} + key1 = Walrat::ParsletSequence.new(@p1, @p2) + key2 = Walrat::ParsletSequence.new('baz'.to_parseable, 'abc'.to_parseable) + hash[:key1] = 'foo' + hash[:key2] = 'bar' + hash[:key1].should == 'foo' + hash[:key2].should == 'bar' + end +end diff --git a/spec/walrat/parslet_spec.rb b/spec/walrat/parslet_spec.rb new file mode 100755 index 0000000..f5299f2 --- /dev/null +++ b/spec/walrat/parslet_spec.rb @@ -0,0 +1,33 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::Parslet do + it 'complains if sent "parse" message' do + # Parslet is an abstract superclass, "parse" is the responsibility of the + # subclasses + expect do + Walrat::Parslet.new.parse('bar') + end.to raise_error(NotImplementedError) + end +end diff --git a/spec/walrat/predicate_spec.rb b/spec/walrat/predicate_spec.rb new file mode 100755 index 0000000..20ce903 --- /dev/null +++ b/spec/walrat/predicate_spec.rb @@ -0,0 +1,59 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::Predicate do + it 'raises an ArgumentError if initialized with nil' do + expect do + Walrat::Predicate.new nil + end.to raise_error(ArgumentError, /nil parseable/) + end + + it 'complains if sent "parse" message' do + # Predicate abstract superclass, "parse" is the responsibility of the + # subclasses + expect do + Walrat::Predicate.new('foo').parse 'bar' + end.to raise_error(NotImplementedError) + end + + it 'should be able to compare predicates for equality' do + Walrat::Predicate.new('foo').should eql(Walrat::Predicate.new('foo')) + Walrat::Predicate.new('foo').should_not eql(Walrat::Predicate.new('bar')) + end + + it '"and" and "not" predicates should yield different hashes even if initialized with the same "parseable"' do + parseable = 'foo'.to_parseable + p1 = Walrat::Predicate.new(parseable) + p2 = Walrat::AndPredicate.new(parseable) + p3 = Walrat::NotPredicate.new(parseable) + + p1.hash.should_not == p2.hash + p2.hash.should_not == p3.hash + p3.hash.should_not == p1.hash + + p1.should_not eql(p2) + p2.should_not eql(p3) + p3.should_not eql(p1) + end +end diff --git a/spec/walrat/proc_parslet_spec.rb b/spec/walrat/proc_parslet_spec.rb new file mode 100755 index 0000000..851a9a5 --- /dev/null +++ b/spec/walrat/proc_parslet_spec.rb @@ -0,0 +1,65 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::ProcParslet do + before do + @parslet = lambda do |string, options| + if string == 'foobar' + string + else + raise Walrat::ParseError.new("expected foobar but got '#{string}'") + end + end.to_parseable + end + + it 'raises an ArgumentError if initialized with nil' do + expect do + Walrat::ProcParslet.new nil + end.to raise_error(ArgumentError, /nil proc/) + end + + it 'complains if asked to parse nil' do + expect do + @parslet.parse nil + end.to raise_error(ArgumentError, /nil string/) + end + + it 'raises Walrat::ParseError if unable to parse' do + expect do + @parslet.parse 'bar' + end.to raise_error(Walrat::ParseError) + end + + it 'returns a parsed value if able to parse' do + @parslet.parse('foobar').should == 'foobar' + end + + it 'can be compared for equality' do + # in practice only parslets created with the exact same Proc instance will + # be eql because Proc returns different hashes for each + @parslet.should eql(@parslet.clone) + @parslet.should eql(@parslet.dup) + @parslet.should_not eql(lambda { nil }.to_parseable) + end +end diff --git a/spec/walrat/regexp_parslet_spec.rb b/spec/walrat/regexp_parslet_spec.rb new file mode 100755 index 0000000..fde6d1a --- /dev/null +++ b/spec/walrat/regexp_parslet_spec.rb @@ -0,0 +1,369 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::RegexpParslet do + before do + @parslet = Walrat::RegexpParslet.new(/[a-zA-Z_][a-zA-Z0-9_]*/) + end + + it 'raises an ArgumentError if initialized with nil' do + expect do + Walrat::RegexpParslet.new nil + end.to raise_error(ArgumentError, /nil regexp/) + end + + it 'parse should succeed if the input string matches' do + lambda { @parslet.parse('an_identifier') }.should_not raise_error + lambda { @parslet.parse('An_Identifier') }.should_not raise_error + lambda { @parslet.parse('AN_IDENTIFIER') }.should_not raise_error + lambda { @parslet.parse('an_identifier1') }.should_not raise_error + lambda { @parslet.parse('An_Identifier1') }.should_not raise_error + lambda { @parslet.parse('AN_IDENTIFIER1') }.should_not raise_error + lambda { @parslet.parse('a') }.should_not raise_error + lambda { @parslet.parse('A') }.should_not raise_error + lambda { @parslet.parse('a9') }.should_not raise_error + lambda { @parslet.parse('A9') }.should_not raise_error + lambda { @parslet.parse('_identifier') }.should_not raise_error + lambda { @parslet.parse('_Identifier') }.should_not raise_error + lambda { @parslet.parse('_IDENTIFIER') }.should_not raise_error + lambda { @parslet.parse('_9Identifier') }.should_not raise_error + lambda { @parslet.parse('_') }.should_not raise_error + end + + it 'parse should succeed if the input string matches, even if it continues after the match' do + lambda { @parslet.parse('an_identifier, more') }.should_not raise_error + lambda { @parslet.parse('An_Identifier, more') }.should_not raise_error + lambda { @parslet.parse('AN_IDENTIFIER, more') }.should_not raise_error + lambda { @parslet.parse('an_identifier1, more') }.should_not raise_error + lambda { @parslet.parse('An_Identifier1, more') }.should_not raise_error + lambda { @parslet.parse('AN_IDENTIFIER1, more') }.should_not raise_error + lambda { @parslet.parse('a, more') }.should_not raise_error + lambda { @parslet.parse('A, more') }.should_not raise_error + lambda { @parslet.parse('a9, more') }.should_not raise_error + lambda { @parslet.parse('A9, more') }.should_not raise_error + lambda { @parslet.parse('_identifier, more') }.should_not raise_error + lambda { @parslet.parse('_Identifier, more') }.should_not raise_error + lambda { @parslet.parse('_IDENTIFIER, more') }.should_not raise_error + lambda { @parslet.parse('_9Identifier, more') }.should_not raise_error + lambda { @parslet.parse('_, more') }.should_not raise_error + end + + it 'parse should return a MatchDataWrapper object' do + @parslet.parse('an_identifier').should == 'an_identifier' + @parslet.parse('an_identifier, more').should == 'an_identifier' + end + + it 'parse should raise an ArgumentError if passed nil' do + expect do + @parslet.parse nil + end.to raise_error(ArgumentError, /nil string/) + end + + it 'parse should raise a ParseError if the input string does not match' do + lambda { @parslet.parse('9') }.should raise_error(Walrat::ParseError) # a number is not a valid identifier + lambda { @parslet.parse('9fff') }.should raise_error(Walrat::ParseError) # identifiers must not start with numbers + lambda { @parslet.parse(' identifier') }.should raise_error(Walrat::ParseError) # note the leading whitespace + lambda { @parslet.parse('') }.should raise_error(Walrat::ParseError) # empty strings can't match + end + + it 'should be able to compare parslets for equality' do + /foo/.to_parseable.should eql(/foo/.to_parseable) # equal + /foo/.to_parseable.should_not eql(/bar/.to_parseable) # different + /foo/.to_parseable.should_not eql(/Foo/.to_parseable) # differing only in case + /foo/.to_parseable.should_not eql('foo') # totally different classes + end + + it 'should accurately pack line and column ends into whatever gets returned from "parse"' do + # single word + parslet = /.+/m.to_parseable + result = parslet.parse('hello') + result.line_end.should == 0 + result.column_end.should == 5 + + # single word with newline at end (UNIX style) + result = parslet.parse("hello\n") + result.line_end.should == 1 + result.column_end.should == 0 + + # single word with newline at end (Classic Mac style) + result = parslet.parse("hello\r") + result.line_end.should == 1 + result.column_end.should == 0 + + # single word with newline at end (Windows style) + result = parslet.parse("hello\r\n") + result.line_end.should == 1 + result.column_end.should == 0 + + # two lines (UNIX style) + result = parslet.parse("hello\nworld") + result.line_end.should == 1 + result.column_end.should == 5 + + # two lines (Classic Mac style) + result = parslet.parse("hello\rworld") + result.line_end.should == 1 + result.column_end.should == 5 + + # two lines (Windows style) + result = parslet.parse("hello\r\nworld") + result.line_end.should == 1 + result.column_end.should == 5 + end + + # in the case of RegexpParslets, the "last successfully scanned position" is + # always 0, 0 + it 'line and column end should reflect last succesfully scanned position prior to failure' do + # fail right at start + parslet = /hello\r\nworld/.to_parseable + begin + parslet.parse('foobar') + rescue Walrat::ParseError => e + exception = e + end + exception.line_end.should == 0 + exception.column_end.should == 0 + + # fail after 1 character + begin + parslet.parse('hfoobar') + rescue Walrat::ParseError => e + exception = e + end + exception.line_end.should == 0 + exception.column_end.should == 0 + + # fail after end-of-line + begin + parslet.parse("hello\r\nfoobar") + rescue Walrat::ParseError => e + exception = e + end + exception.line_end.should == 0 + exception.column_end.should == 0 + end +end + +describe 'chaining two regexp parslets together' do + it 'parslets should work in specified order' do + parslet = Walrat::RegexpParslet.new(/foo.\d/) & + Walrat::RegexpParslet.new(/bar.\d/) + parslet.parse('foo_1bar_2').should == ['foo_1', 'bar_2'] + end + + # Parser Expression Grammars match greedily + it 'parslets should match greedily' do + # the first parslet should gobble up the entire string, preventing the + # second parslet from succeeding + parslet = Walrat::RegexpParslet.new(/foo.+\d/) & + Walrat::RegexpParslet.new(/bar.+\d/) + lambda { parslet.parse('foo_1bar_2') }.should raise_error(Walrat::ParseError) + end +end + +describe 'alternating two regexp parslets' do + it 'either parslet should apply to generate a match' do + parslet = Walrat::RegexpParslet.new(/\d+/) | + Walrat::RegexpParslet.new(/[A-Z]+/) + parslet.parse('ABC').should == 'ABC' + parslet.parse('123').should == '123' + end + + it 'should fail if no parslet generates a match' do + parslet = Walrat::RegexpParslet.new(/\d+/) | + Walrat::RegexpParslet.new(/[A-Z]+/) + lambda { parslet.parse('abc') }.should raise_error(Walrat::ParseError) + end + + it 'parslets should be tried in left-to-right order' do + # in this case the first parslet should win even though the second one is also a valid match + parslet = Walrat::RegexpParslet.new(/(.)(..)/) | + Walrat::RegexpParslet.new(/(..)(.)/) + match_data = parslet.parse('abc').match_data + match_data[1].should == 'a' + match_data[2].should == 'bc' + + # here we swap the order; again the first parslet should win + parslet = Walrat::RegexpParslet.new(/(..)(.)/) | + Walrat::RegexpParslet.new(/(.)(..)/) + match_data = parslet.parse('abc').match_data + match_data[1].should == 'ab' + match_data[2].should == 'c' + end +end + +describe 'chaining three regexp parslets' do + it 'parslets should work in specified order' do + parslet = Walrat::RegexpParslet.new(/foo.\d/) & + Walrat::RegexpParslet.new(/bar.\d/) & + Walrat::RegexpParslet.new(/.../) + parslet.parse('foo_1bar_2ABC').should == ['foo_1', 'bar_2', 'ABC'] + end +end + +describe 'alternating three regexp parslets' do + it 'any parslet should apply to generate a match' do + parslet = Walrat::RegexpParslet.new(/\d+/) | + Walrat::RegexpParslet.new(/[A-Z]+/) | + Walrat::RegexpParslet.new(/[a-z]+/) + parslet.parse('ABC').should == 'ABC' + parslet.parse('123').should == '123' + parslet.parse('abc').should == 'abc' + end + + it 'should fail if no parslet generates a match' do + parslet = Walrat::RegexpParslet.new(/\d+/) | + Walrat::RegexpParslet.new(/[A-Z]+/) | + Walrat::RegexpParslet.new(/[a-z]+/) + lambda { parslet.parse(':::') }.should raise_error(Walrat::ParseError) + end + + it 'parslets should be tried in left-to-right order' do + # in this case the first parslet should win even though the others also produce valid matches + parslet = Walrat::RegexpParslet.new(/(.)(..)/) | + Walrat::RegexpParslet.new(/(..)(.)/) | + Walrat::RegexpParslet.new(/(...)/) + match_data = parslet.parse('abc').match_data + match_data[1].should == 'a' + match_data[2].should == 'bc' + + # here we swap the order; again the first parslet should win + parslet = Walrat::RegexpParslet.new(/(..)(.)/) | + Walrat::RegexpParslet.new(/(.)(..)/) | + Walrat::RegexpParslet.new(/(...)/) + match_data = parslet.parse('abc').match_data + match_data[1].should == 'ab' + match_data[2].should == 'c' + + # similar test but this time the first parslet can't win (doesn't match) + parslet = Walrat::RegexpParslet.new(/foo/) | + Walrat::RegexpParslet.new(/(...)/) | + Walrat::RegexpParslet.new(/(.)(..)/) + match_data = parslet.parse('abc').match_data + match_data[1].should == 'abc' + end +end + +describe 'combining chaining and alternation' do + it 'chaining should having higher precedence than alternation' do + # equivalent to /foo/ | ( /bar/ & /abc/ ) + parslet = Walrat::RegexpParslet.new(/foo/) | + Walrat::RegexpParslet.new(/bar/) & + Walrat::RegexpParslet.new(/abc/) + parslet.parse('foo').should == 'foo' # succeed on first choice + parslet.parse('barabc').should == ['bar', 'abc'] # succeed on alternate path + lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # fail half-way down alternate path + lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately + + # swap the order, now equivalent to: ( /bar/ & /abc/ ) | /foo/ + parslet = Walrat::RegexpParslet.new(/bar/) & + Walrat::RegexpParslet.new(/abc/) | + Walrat::RegexpParslet.new(/foo/) + parslet.parse('barabc').should == ['bar', 'abc'] # succeed on first choice + parslet.parse('foo').should == 'foo' # succeed on alternate path + lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # fail half-way down first path + lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately + end + + it 'should be able to override precedence using parentheses' do + # take first example above and make it ( /foo/ | /bar/ ) & /abc/ + parslet = (Walrat::RegexpParslet.new(/foo/) | + Walrat::RegexpParslet.new(/bar/)) & + Walrat::RegexpParslet.new(/abc/) + parslet.parse('fooabc').should == ['foo', 'abc'] # first choice + parslet.parse('barabc').should == ['bar', 'abc'] # second choice + lambda { parslet.parse('foo...') }.should raise_error(Walrat::ParseError) # fail in second half + lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # another way of failing in second half + lambda { parslet.parse('foo') }.should raise_error(Walrat::ParseError) # another way of failing in second half + lambda { parslet.parse('bar') }.should raise_error(Walrat::ParseError) # another way of failing in second half + lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately + lambda { parslet.parse('abcfoo') }.should raise_error(Walrat::ParseError) # order matters + + # take second example above and make it /bar/ & ( /abc/ | /foo/ ) + parslet = Walrat::RegexpParslet.new(/bar/) & + (Walrat::RegexpParslet.new(/abc/) | Walrat::RegexpParslet.new(/foo/)) + parslet.parse('barabc').should == ['bar', 'abc'] # succeed on first choice + parslet.parse('barfoo').should == ['bar', 'foo'] # second choice + lambda { parslet.parse('bar...') }.should raise_error(Walrat::ParseError) # fail in second part + lambda { parslet.parse('bar') }.should raise_error(Walrat::ParseError) # another way to fail in second part + lambda { parslet.parse('lemon') }.should raise_error(Walrat::ParseError) # fail immediately + lambda { parslet.parse('abcbar') }.should raise_error(Walrat::ParseError) # order matters + end + + it 'should be able to include long runs of sequences' do + # A & B & C & D | E + parslet = Walrat::RegexpParslet.new(/a/) & + Walrat::RegexpParslet.new(/b/) & + Walrat::RegexpParslet.new(/c/) & + Walrat::RegexpParslet.new(/d/) | + Walrat::RegexpParslet.new(/e/) + parslet.parse('abcd').should == ['a', 'b', 'c', 'd'] + parslet.parse('e').should == 'e' + lambda { parslet.parse('f') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to include long runs of options' do + # A | B | C | D & E + parslet = Walrat::RegexpParslet.new(/a/) | + Walrat::RegexpParslet.new(/b/) | + Walrat::RegexpParslet.new(/c/) | + Walrat::RegexpParslet.new(/d/) & + Walrat::RegexpParslet.new(/e/) + parslet.parse('a').should == 'a' + parslet.parse('b').should == 'b' + parslet.parse('c').should == 'c' + parslet.parse('de').should == ['d', 'e'] + lambda { parslet.parse('f') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to alternate repeatedly between sequences and choices' do + # A & B | C & D | E + parslet = Walrat::RegexpParslet.new(/a/) & + Walrat::RegexpParslet.new(/b/) | + Walrat::RegexpParslet.new(/c/) & + Walrat::RegexpParslet.new(/d/) | + Walrat::RegexpParslet.new(/e/) + parslet.parse('ab').should == ['a', 'b'] + parslet.parse('cd').should == ['c', 'd'] + parslet.parse('e').should == 'e' + lambda { parslet.parse('f') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to combine long runs with alternation' do + # A & B & C | D | E | F & G & H + parslet = Walrat::RegexpParslet.new(/a/) & + Walrat::RegexpParslet.new(/b/) & + Walrat::RegexpParslet.new(/c/) | + Walrat::RegexpParslet.new(/d/) | + Walrat::RegexpParslet.new(/e/) | + Walrat::RegexpParslet.new(/f/) & + Walrat::RegexpParslet.new(/g/) & + Walrat::RegexpParslet.new(/h/) + parslet.parse('abc').should == ['a', 'b', 'c'] + parslet.parse('d').should == 'd' + parslet.parse('e').should == 'e' + parslet.parse('fgh').should == ['f', 'g', 'h'] + lambda { parslet.parse('i') }.should raise_error(Walrat::ParseError) + end +end diff --git a/spec/walrat/string_enumerator_spec.rb b/spec/walrat/string_enumerator_spec.rb new file mode 100755 index 0000000..703a63d --- /dev/null +++ b/spec/walrat/string_enumerator_spec.rb @@ -0,0 +1,88 @@ +# encoding: utf-8 +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::StringEnumerator do + it 'raises an ArgumentError if initialized with nil' do + expect do + Walrat::StringEnumerator.new nil + end.to raise_error(ArgumentError, /nil string/) + end + + it 'returns characters one by one until end of string, then return nil' do + enumerator = Walrat::StringEnumerator.new('hello') + enumerator.next.should == 'h' + enumerator.next.should == 'e' + enumerator.next.should == 'l' + enumerator.next.should == 'l' + enumerator.next.should == 'o' + enumerator.next.should be_nil + end + + it 'is Unicode-aware (UTF-8)' do + enumerator = Walrat::StringEnumerator.new('€ cañon') + enumerator.next.should == '€' + enumerator.next.should == ' ' + enumerator.next.should == 'c' + enumerator.next.should == 'a' + enumerator.next.should == 'ñ' + enumerator.next.should == 'o' + enumerator.next.should == 'n' + enumerator.next.should be_nil + end + + # this was a bug + it 'continues past newlines' do + enumerator = Walrat::StringEnumerator.new("hello\nworld") + enumerator.next.should == 'h' + enumerator.next.should == 'e' + enumerator.next.should == 'l' + enumerator.next.should == 'l' + enumerator.next.should == 'o' + enumerator.next.should == "\n" # was returning nil here + enumerator.next.should == 'w' + enumerator.next.should == 'o' + enumerator.next.should == 'r' + enumerator.next.should == 'l' + enumerator.next.should == 'd' + end + + it 'can recall the last character using the "last" method' do + enumerator = Walrat::StringEnumerator.new('h€llo') + enumerator.last.should == nil # nothing scanned yet + enumerator.next.should == 'h' # advance + enumerator.last.should == nil # still no previous character + enumerator.next.should == '€' # advance + enumerator.last.should == 'h' + enumerator.next.should == 'l' # advance + enumerator.last.should == '€' + enumerator.next.should == 'l' # advance + enumerator.last.should == 'l' + enumerator.next.should == 'o' # advance + enumerator.last.should == 'l' + enumerator.next.should == nil # nothing left to scan + enumerator.last.should == 'o' + enumerator.last.should == 'o' # didn't advance, so should return the same + end +end diff --git a/spec/walrat/string_parslet_spec.rb b/spec/walrat/string_parslet_spec.rb new file mode 100755 index 0000000..f512d1f --- /dev/null +++ b/spec/walrat/string_parslet_spec.rb @@ -0,0 +1,145 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::StringParslet do + before do + @parslet = Walrat::StringParslet.new('HELLO') + end + + it 'should raise an ArgumentError if initialized with nil' do + lambda { Walrat::StringParslet.new(nil) }.should raise_error(ArgumentError) + end + + it 'parse should succeed if the input string matches' do + lambda { @parslet.parse('HELLO') }.should_not raise_error + end + + it 'parse should succeed if the input string matches, even if it continues after the match' do + lambda { @parslet.parse('HELLO...') }.should_not raise_error + end + + it 'parse should return parsed string' do + @parslet.parse('HELLO').should == 'HELLO' + @parslet.parse('HELLO...').should == 'HELLO' + end + + it 'parse should raise an ArgumentError if passed nil' do + lambda { @parslet.parse(nil) }.should raise_error(ArgumentError) + end + + it 'parse should raise a ParseError if the input string does not match' do + lambda { @parslet.parse('GOODBYE') }.should raise_error(Walrat::ParseError) # total mismatch + lambda { @parslet.parse('GOODBYE, HELLO') }.should raise_error(Walrat::ParseError) # eventually would match, but too late + lambda { @parslet.parse('HELL...') }.should raise_error(Walrat::ParseError) # starts well, but fails + lambda { @parslet.parse(' HELLO') }.should raise_error(Walrat::ParseError) # note the leading whitespace + lambda { @parslet.parse('') }.should raise_error(Walrat::ParseError) # empty strings can't match + end + + it 'parse exceptions should include a detailed error message' do + # TODO: catch the raised exception and compare the message + lambda { @parslet.parse('HELL...') }.should raise_error(Walrat::ParseError) + lambda { @parslet.parse('HELL') }.should raise_error(Walrat::ParseError) + end + + it 'should be able to compare string parslets for equality' do + 'foo'.to_parseable.should eql('foo'.to_parseable) # equal + 'foo'.to_parseable.should_not eql('bar'.to_parseable) # different + 'foo'.to_parseable.should_not eql('Foo'.to_parseable) # differing only in case + 'foo'.to_parseable.should_not eql(/foo/) # totally different classes + end + + it 'should accurately pack line and column ends into whatever is returned by "parse"' do + # single word + parslet = 'hello'.to_parseable + result = parslet.parse('hello') + result.line_end.should == 0 + result.column_end.should == 5 + + # single word with newline at end (UNIX style) + parslet = "hello\n".to_parseable + result = parslet.parse("hello\n") + result.line_end.should == 1 + result.column_end.should == 0 + + # single word with newline at end (Classic Mac style) + parslet = "hello\r".to_parseable + result = parslet.parse("hello\r") + result.line_end.should == 1 + result.column_end.should == 0 + + # single word with newline at end (Windows style) + parslet = "hello\r\n".to_parseable + result = parslet.parse("hello\r\n") + result.line_end.should == 1 + result.column_end.should == 0 + + # two lines (UNIX style) + parslet = "hello\nworld".to_parseable + result = parslet.parse("hello\nworld") + result.line_end.should == 1 + result.column_end.should == 5 + + # two lines (Classic Mac style) + parslet = "hello\rworld".to_parseable + result = parslet.parse("hello\rworld") + result.line_end.should == 1 + result.column_end.should == 5 + + # two lines (Windows style) + parslet = "hello\r\nworld".to_parseable + result = parslet.parse("hello\r\nworld") + result.line_end.should == 1 + result.column_end.should == 5 + end + + it 'line and column end should reflect last succesfully scanned position prior to failure' do + # fail right at start + parslet = "hello\r\nworld".to_parseable + begin + parslet.parse('foobar') + rescue Walrat::ParseError => e + exception = e + end + exception.line_end.should == 0 + exception.column_end.should == 0 + + # fail after 1 character + begin + parslet.parse('hfoobar') + rescue Walrat::ParseError => e + exception = e + end + exception.line_end.should == 0 + exception.column_end.should == 1 + + # fail after end-of-line + begin + parslet.parse("hello\r\nfoobar") + rescue Walrat::ParseError => e + exception = e + end + exception.line_end.should == 1 + exception.column_end.should == 0 + end +end diff --git a/spec/walrat/symbol_parslet_spec.rb b/spec/walrat/symbol_parslet_spec.rb new file mode 100755 index 0000000..c3e4904 --- /dev/null +++ b/spec/walrat/symbol_parslet_spec.rb @@ -0,0 +1,38 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('../spec_helper', File.dirname(__FILE__)) + +describe Walrat::SymbolParslet do + it 'should raise an ArgumentError if initialized with nil' do + expect do + Walrat::SymbolParslet.new nil + end.to raise_error(ArgumentError, /nil symbol/) + end + + it 'should be able to compare symbol parslets for equality' do + :foo.to_parseable.should eql(:foo.to_parseable) # equal + :foo.to_parseable.should_not eql(:bar.to_parseable) # different + :foo.to_parseable.should_not eql(:Foo.to_parseable) # differing only in case + :foo.to_parseable.should_not eql(/foo/) # totally different classes + end +end diff --git a/walrat.gemspec b/walrat.gemspec new file mode 100644 index 0000000..684910c --- /dev/null +++ b/walrat.gemspec @@ -0,0 +1,50 @@ +# Copyright 2007-2010 Wincent Colaiuta. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +require File.expand_path('lib/walrus/version.rb', File.dirname(__FILE__)) + +Gem::Specification.new do |s| + s.name = 'walrus' + s.version = Walrus::VERSION + s.author = 'Wincent Colaiuta' + s.email = 'win@wincent.com' + s.homepage = 'https://wincent.com/products/walrus' + s.rubyforge_project = 'walrus' + s.platform = Gem::Platform::RUBY + s.summary = 'Object-oriented templating system' + s.description = <<-ENDDESC + Walrus is an object-oriented templating system inspired by and similar + to the Cheetah Python-powered template engine. It includes a Parser + Expression Grammar (PEG) parser generator capable of generating an + integrated lexer, "packrat" parser, and Abstract Syntax Tree (AST) + builder. + ENDDESC + s.require_paths = ['lib'] + s.has_rdoc = true + + # TODO: add 'docs' subdirectory, 'README.txt' when they're done + s.files = Dir['bin/walrus', 'lib/**/*.rb'] + s.executables = ['walrus'] + s.add_runtime_dependency('wopen3', '>= 0.1') + s.add_development_dependency('mkdtemp', '>= 1.0') + s.add_development_dependency('rspec', '1.3.0') +end -- 2.37.1