1 # Copyright 2007-2010 Wincent Colaiuta. All rights reserved.
2 # Redistribution and use in source and binary forms, with or without
3 # modification, are permitted provided that the following conditions are met:
5 # 1. Redistributions of source code must retain the above copyright notice,
6 # this list of conditions and the following disclaimer.
7 # 2. Redistributions in binary form must reproduce the above copyright notice,
8 # this list of conditions and the following disclaimer in the documentation
9 # and/or other materials provided with the distribution.
11 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
12 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
13 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
14 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
15 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
16 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
17 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
18 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
19 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
20 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
21 # POSSIBILITY OF SUCH DAMAGE.
25 # Additions to String class for Unicode support.
26 # Parslet combining methods.
27 # Convenience methods (to_parseable).
28 # Conversion utility methods.
36 # NOTE: this is a totally Walrat-specific implementation that is
37 # unlikely to be of use anywhere else. It is used in only 1 place
38 # in the codebase, and works around the fact that the MatchData
39 # made available by the index method gets clobbered by the
40 # "chars.to_a" call. The same thing happens for alternative
41 # methods of counting the chars, such as using jlength or a manual
44 # One workaround is for the caller to re-perform the index call just
45 # to get the MatchData again, but that is inefficient. So here we
46 # just do the addition before returning the result to the caller.
47 def jindex_plus_length arg
49 $~[0].length + unpack('C*')[0...i].pack('C*').chars.to_a.length
53 # Unlike the normal rindex method, the MatchData in $~ set by the inner
54 # rindex call gets clobbered (by the "chars.to_a" call) and is not visible to
55 # the caller of this method.
56 def jrindex arg, offset = Walrat::NoParameterMarker.instance
57 if offset == Walrat::NoParameterMarker.instance
60 i = rindex arg, offset
62 i ? unpack('C*')[0...i].pack('C*').chars.to_a.length : nil
65 # multi-byte friendly [] implementation
66 def [](range, other = Walrat::NoParameterMarker.instance)
67 if other == Walrat::NoParameterMarker.instance
68 if range.kind_of? Range
69 chars.to_a[range].join
74 old_range range, other
78 # Returns a character-level enumerator for the receiver.
80 Walrat::StringEnumerator.new self
83 # Rationale: it's ok to add "&" and "|" methods to string because they don't
84 # exist yet (they're not overrides).
85 include Walrat::ParsletCombining
87 # Returns a StringParslet based on the receiver
89 Walrat::StringParslet.new self
92 # Converts the receiver of the form "foo_bar" to "FooBar". Specifically, the
93 # receiver is split into pieces delimited by underscores, each component is
94 # then converted to captial case (the first letter is capitalized and the
95 # remaining letters are lowercased) and finally the components are joined.
97 self.split('_').collect { |component| component.capitalize}.join