1 # The Wikitext module provides a namespace for all of the extension.
2 # In practice, all your interaction will be with the Wikitext::Parser
8 # == +line_ending+ (String)
10 # The line ending to be used in the generated HTML (defaults to "\n").
12 # == +internal_link_prefix+ (String)
14 # The prefix to be prepended to internal links (defaults to "/wiki/").
15 # For example, given an +internal_link_prefix+ of "/wiki/", the internal
21 # would be transformed into:
24 # <a href="/wiki/Apple">Apple</a>
26 # == +external_link_class+ (String)
28 # The CSS class to be applied to external links (defaults to "external").
29 # For example, given an +external_link_class+ of "external", the external
33 # [http://www.google.com/ the best search engine]
35 # would be transformed into:
38 # <a class="external" href="http://www.google.com/">the best search engine</a>
40 # == +external_link_rel+ (String)
42 # The +rel+ attribute to be applied to external links (defaults to +nil+,
43 # meaning that no +rel+ attribute is applied). Setting a +rel+ attribute of
44 # "nofollow" may be useful for search-engine optimization (see
45 # http://en.wikipedia.org/wiki/Nofollow for more details).
47 # This attribute can be set during initialization:
49 # parser = Wikitext::Parser.new :external_link_rel => 'nofollow'
51 # Or via setting an attribute on the parser:
53 # parser = Wikitext::Parser.new
54 # parser.external_link_rel = 'nofollow'
58 # parser = Wikitext::Parser.new
59 # parser.parse input, :external_link_rel => 'nofollow'
61 # Setting +external_link_rel+ to +nil+ suppresses the emission of any
62 # previously configured +rel+ attribute:
64 # parser.parse input, :external_link_rel => nil
66 # == +mailto_class+ (String)
68 # The CSS class to be applied to external "mailto" links (defaults to
69 # "mailto"). For example:
72 # [mailto:user@example.com user@example.com]
74 # or if autolinking of email addresses is turned on, just:
79 # would be transformed into:
82 # <a class="mailto" href="mailto:user@example.com">user@example.com</a>
84 # == +img_prefix+ (String)
86 # The prefix to be prepended to image tags (defaults to "/images/").
87 # For example, given this image markup:
92 # The following +img+ tag would be produced:
95 # <img src="/images/foo.png" alt="foo.png" />
97 # == +autolink+ (boolean)
99 # Whether to autolink URIs found in the plain scope.
105 # will be transformed to:
108 # <a href="http://apple.com/">http://apple.com/</a>
110 # and if an external_link_class is set (to "external", for example) then
111 # the transformation will be:
114 # <a class="external" href="http://apple.com/">http://apple.com/</a>
116 # When false, no transformation will be applied and the link will be
122 # == +space_to_underscore+ (boolean)
124 # Whether spaces in link targets should be encoded normally or transformed
127 # When false, an internal link like:
132 # Would be converted into:
135 # <a href="/wiki/foo%20bar">foo bar</a>
137 # But when true (the default), it would be converted into:
140 # <a href="/wiki/foo_bar">foo bar</a>
142 # Converting spaces to underscores makes most URLs prettier, but it comes at
143 # a cost: when this mode is true the articles "foo bar" and "foo_bar" can no
144 # longer be disambiguated, and a link to "foo_bar" will actually resolve to
145 # "foo bar"; it is therefore recommended that you explicitly disallow
146 # underscores in titles at the application level so as to avoid this kind of
149 # == +base_heading_level+ (integer)
151 # An integer between 0 and 6 denoting the current "heading level".
152 # This can be used to inform the parser of the "context" in which
153 # it is translating markup.
155 # For example, the parser might be translating blog post excerpts
156 # on a page where there is an "h1" title element for the page itself
157 # and an "h2" title element for each excerpt. In this context it is
158 # useful to set +base_heading_level+ to 2, so that any "top level"
159 # headings in the markup (that is "h1" elements) can be automatically
160 # transformed into "h3" elements so that they appear to be
161 # appropriately "nested" inside the containing page elements.
163 # In this way, markup authors can be freed from thinking about
164 # which header size they should use and just always start from "h1"
165 # for their most general content and work their way down.
167 # An additional benefit is that markup can be used in different
168 # contexts at different levels of nesting and the headings will be
169 # adjusted to suit automatically with no intervention from the
172 # Finally, it's worth noting that in contexts where the user input
173 # is not necessarily trusted, this setting can be used to prevent
174 # users from inappropriately employing "h1" tags in deeply-nested
175 # contexts where they would otherwise disturb the visual harmony of
178 # == +output_style+ (Symbol)
180 # Wikitext emits valid HTML5 fragments. By default, the output syntax is
181 # HTML. Optionally, the output syntax can be changed to XML by setting the
182 # +output_style+ to ":xml".
184 # This can be done during initialization:
186 # parser = Wikitext::Parser.new :output_style => :xml
188 # Or via setting an attribute on the parser:
190 # parser = Wikitext::Parser.new
191 # parser.output_style = :xml
195 # parser = Wikitext::Parser.new
196 # parser.parse input, :output_style => :xml
198 # In practice the only difference between the two output syntaxes is that
199 # the XML syntax uses self closing +img+ tags:
202 # <img src="foo.png" alt="Foo" />
204 # While the HTML syntax does not:
207 # <img src="foo.png" alt="Foo">
209 # == +link_proc+ (lambda or Proc object)
211 # "Red links" can be implemented by providing a custom +link_proc+ block
212 # at parse time. This can be used to check for existing or non-existent
213 # link targets and apply custom CSS styling accordingly. For example,
216 # link_proc = lambda { |target| target == 'bar' ? 'redlink' : nil }
217 # Wikitext::Parser.new.parse '[[foo]] [[bar]]', :link_proc => link_proc
219 # This would add the "redlink" CSS class to the "bar" link but not the
220 # "foo" link. Please note that if your +link_proc+ involves database
221 # queries then you should implement an appropriate caching strategy to
222 # ensure that markup with many links does not overwhelm your database.
224 # Many more examples of link procs can be found in the spec suite:
226 # * http://git.wincent.com/wikitext.git/blob/HEAD:/spec/internal_link_spec.rb
229 # Sanitizes an internal link target for inclusion within the HTML
230 # stream. Expects +string+ to be UTF-8-encoded.
232 # For example, a link target for the article titled:
237 # would be sanitized as:
240 # foo, "bar" & baz €
242 # Note that characters which have special meaning within HTML such as
243 # quotes and ampersands are turned into named entities, and characters
244 # outside of the printable ASCII range are turned into hexadecimal
247 # See also encode_link_target.
248 def self.sanitize_link_target string
249 # This is just a placeholder.
250 # See parser.c for the C source code to this method.
253 # URL-encodes an internal link target for use as an href attribute in an
254 # anchor. Expects +string+ to be UTF-8-encoded.
256 # For example, the link target:
261 # would be encoded as:
264 # foo%2c%20%22bar%22%20%26%20baz%e2%82%ac
266 # The encoding is based on RFCs 2396 and 2718. The "unreserved" characters
267 # a..z, a..Z, 0..9, "-", "_", "." and "~" are passed through unchanged and
268 # all others are converted into percent escapes.
270 # When combined with sanitize_link_target this method can be used to emit
271 # the following link for the example article:
274 # <a href="foo%2c%20%22bar%22%20%26%20baz%e2%82%ac">foo, "bar" & baz €</a>
276 # Note that when +space_to_underscore+ is +true+ spaces are treated specially,
277 # and are converted to "_" rather than "%20". For the majority of links this
278 # yields much prettier URLs at the cost of some reduction in the namespace of
279 # possible titles (this is because when using +space_to_underscore+ you should
280 # disallow underscores in article titles to avoid ambiguity between titles like
281 # "foo bar" and "foo_bar").
282 def self.encode_link_target string
283 # This is just a placeholder.
284 # See parser.c for the C source code to this method.
287 # Prepares a Parser instance.
289 # There are a number of attributes that you can set on the returned
290 # parser to customize its behaviour. See the attributes documentation
291 # in the Parser class. You also have the option of overriding the
292 # attributes at initialization time passing in the attribute name in
293 # symbol form together with the overridden value.
295 # In other words, both:
297 # parser = Wikitext::Parser.new
298 # parser.autolink = false
299 # parser.mailto_class = 'mail'
303 # parser = Wikitext::Parser.new :autolink => false, :mailto_class => 'mail'
306 def initialize options = {}
307 # This is just a placeholder.
308 # See parser.c for the C source code to this method.
311 # Feeds the UTF-8-encoded +string+ into the scanner and returns an
312 # array of recognized tokens. Raises a Wikitext::Parser::Error
313 # exception if the input string is not valid UTF-8.
315 # Normally you don't need to invoke this method manually because the
316 # parse method automatically sets up a scanner and obtains tokens as
317 # it needs them. This method exists for testing and introspection
320 # This is just a placeholder.
321 # See parser.c for the C source code to this method.
324 # Like the tokenize method feeds +string+ into the scanner to obtain
325 # the corresponding tokens, but unlike the tokenize method it does not
326 # return them because its sole purpose is to measure the speed of
329 # Just like the tokenize method raises a Wikitext::Parser::Error if
330 # passed invalid UTF-8 input.
331 def benchmarking_tokenize string
332 # This is just a placeholder.
333 # See parser.c for the C source code to this method.
336 # Parses and transforms the UTF-8 wikitext markup input string into
337 # HTML. Raises a Wikitext::Parser::Error if passed invalid UTF-8.
338 # You can customize some aspects of the transformation by setting
339 # attributes on the parser instance before calling this method
340 # (see the attributes documentation for the Parser class),
341 # or by passing in an (optional) options hash.
343 # Options that can be overridden at parse-time include:
345 # +indent+:: A non-negative number (to add an arbitrary
346 # amount of indentation to all lines in the
347 # output) or false (to disable indentation
349 # +base_heading_level+:: An integer between 0 and 6 denoting the
350 # current "heading level" (documented above).
351 # +output_style+:: A symbol, ":xml", to emit XML syntax (by
352 # default HTML syntax is emitted)
353 # +link_proc+:: A lambda that can be used to apply custom
354 # CSS to links to produce "red links"
356 def parse string, options = {}
357 # This is just a placeholder.
358 # See parser.c for the C source code to this method.
361 # Like the parse method transforms the UTF-8 input +string+ from
362 # wikitext markup into HTML, but doesn't return a result. This
363 # method is specifically designed for performance profiling so
364 # you shouldn't need to call it in practice. It parses the supplied
365 # string 100,000 times so as to provide enough sample data to make
366 # profiling useful while minimizing the influence of extraneous
367 # factors such as set-up code.
368 def profiling_parse string
369 # This is just a placeholder.
370 # See parser.c for the C source code to this method.
373 # Exception raised when an error occurs during parsing.
374 # As the parser is designed to gracefully cope with bad syntax, the
375 # only reason you should see this exception is if you pass
376 # invalidly-encoded UTF-8 to the parse method.
377 class Error < Exception
380 # Token object representing a symbol found in the input stream during
381 # scanning. When you invoke the tokenize method you receive an array
382 # of Token instances.
384 # This class exists purely for testing and diagnostic purposes; it
385 # is actually just a wrapper for the real token structure that is
386 # used internally. (In actual use the Wikitext extension doesn't
387 # even use this class; it instead uses lightweight C structs under
388 # the hood for maximum speed and memory efficiency.)
392 # +start+:: the location in memory (a character pointer
393 # into the input stream) where the token begins
394 # +stop+:: the location in memory (a character pointer)
395 # where the token ends
396 # +line_start+:: the line number where the token starts;
397 # numbering begins at line 1 (there is no line 0)
398 # +line_stop+:: the line number where the token ends
399 # +column_start+:: the column number where the token start;
400 # numbering beings at column 1 (there is no column 0)
401 # +column_stop+:: the column number where the token ends
402 # +code_point+:: for tokens outside the range of printable ASCII
403 # the UTF-32 code point corresponding to the token
404 # +token_type+:: the type of the token, from the possible set of
405 # token types returned by the types method
406 # +string_value+:: the textal content of the token as a Ruby String
408 # Returns a hashof all token types by (numeric) value and
409 # (human-readable) name.
411 # This is just a placeholder.
412 # See token.c for the C source code to this method.