-#!/usr/bin/env ruby
-# Copyright 2008-2013 Wincent Colaiuta. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-require File.join(File.dirname(__FILE__), '..', 'ext', 'wikitext')
-require 'benchmark'
-
-# 91 characters, 91 bytes
-short_slab_of_ASCII_text = '* Lorem [[ipsum|hello]] dolor sit amet, `consectetuer` http://example.com/ adipiscing elit.'
-
-# compare against the expected output to ensure correctness
-short_slab_of_ASCII_text_output = <<SLAB
-<ul>
- <li>Lorem <a href="/wiki/ipsum">hello</a> dolor sit amet, <tt>consectetuer</tt> <a href="http://example.com/" class="external">http://example.com/</a> adipiscing elit.</li>
-</ul>
-SLAB
-
-# 91 characters, 122 bytes
-short_slab_of_UTF8_text = '* Lór€m [[ïpsûm|h€llö]] dólór sït àm€t, `cóñs€ct€tû€r` http://example.com/ àdïpïscïñg €lït.'
-
-short_slab_of_UTF8_text_output = <<SLAB
-<ul>
- <li>Lór€m <a href="/wiki/%c3%afps%c3%bbm">h€llö</a> dólór sït àm€t, <tt>cóñs€ct€tû€r</tt> <a href="http://example.com/" class="external">http://example.com/</a> àdïpïscïñg €lït.</li>
-</ul>
-SLAB
-
-# 1415 characters, 1415 bytes
-longer_slab_of_ASCII_text = <<SLAB
-paragraph
-second line
-
-new paragraph
-
-= a heading =
-
-> a blockquote
-> second line of blockquote
->
-> new paragraph within blockquote
-
-== another heading ==
-
-paragraph within ''multiple '''styles''''' and <tt>tt span</tt>
-
-similar, but with '''styles in ''different'' order'''
-
-again, a '''different ''order'''''
-
-* list item 1
-** nested list item 1
-** nested list item 2
-** nested list item 3
-* list item 2
-
- // this is a code block
- notice how it can contain ''markup''
- which would '''otherwise''' have <tt>special</tt> meaning
- although explicit entities © are passed through unchanged
-
-a normal paragraph again
-
-This is where we show a link to an article on [[GCC]].
-Related to that, [[GCC|a link]] to the same
-article but with custom link text.
-
-External links [http://example.com work too].
-As well as autolinks as seen http://example.com/
-here.
-
-Look at how we handle bad syntax. [[This is an unterminated
-link. And [http://example.com/ is another.
-
-# this is an ordered list
-# which continues
-## and has another ordered list
-## nested inside it
-# and then falls back
-#* and then nests another list
-#* this time an unordered one
-#** itself containing a nested list
-#** which continues
-#**# and finally nests yet another ordered list
-#**# which continues
-#* drops back quite a way
-# and finally all the way
-#****** and finishes with an invalid item
-
-=== heading with missing closing tag
-* list
-# new list
-SLAB
-
-longer_slab_of_ASCII_text_output = <<SLAB
-<p>paragraph second line</p>
-<p>new paragraph</p>
-<h1>a heading</h1>
-<blockquote>
- <p>a blockquote second line of blockquote</p>
- <p>new paragraph within blockquote</p>
-</blockquote>
-<h2>another heading</h2>
-<p>paragraph within <em>multiple <strong>styles</strong></em> and <tt>tt span</tt></p>
-<p>similar, but with <strong>styles in <em>different</em> order</strong></p>
-<p>again, a <strong>different <em>order</em></strong></p>
-<ul>
- <li>list item 1
- <ul>
- <li>nested list item 1</li>
- <li>nested list item 2</li>
- <li>nested list item 3</li>
- </ul>
- </li>
- <li>list item 2</li>
-</ul>
-<pre>// this is a code block
-notice how it can contain ''markup''
-which would '''otherwise''' have <tt>special</tt> meaning
-although explicit entities © are passed through unchanged</pre>
-<p>a normal paragraph again</p>
-<p>This is where we show a link to an article on <a href="/wiki/GCC">GCC</a>. Related to that, <a href="/wiki/GCC">a link</a> to the same article but with custom link text.</p>
-<p>External links <a href="http://example.com" class="external">work too</a>. As well as autolinks as seen <a href="http://example.com/" class="external">http://example.com/</a> here.</p>
-<p>Look at how we handle bad syntax. [[This is an unterminated link. And [<a href="http://example.com/" class="external">http://example.com/</a> is another.</p>
-<ol>
- <li>this is an ordered list</li>
- <li>which continues
- <ol>
- <li>and has another ordered list</li>
- <li>nested inside it</li>
- </ol>
- </li>
- <li>and then falls back
- <ul>
- <li>and then nests another list</li>
- <li>this time an unordered one
- <ul>
- <li>itself containing a nested list</li>
- <li>which continues
- <ol>
- <li>and finally nests yet another ordered list</li>
- <li>which continues</li>
- </ol>
- </li>
- </ul>
- </li>
- <li>drops back quite a way</li>
- </ul>
- </li>
- <li>and finally all the way
- <ul>
- <li>***** and finishes with an invalid item</li>
- </ul>
- </li>
-</ol>
-<h3>heading with missing closing tag</h3>
-<ul>
- <li>list</li>
-</ul>
-<ol>
- <li>new list</li>
-</ol>
-SLAB
-
-# 1415 characters, 2061 bytes
-longer_slab_of_UTF8_text = <<SLAB
-pärägräph
-∫€cöñd lîñ€
-
-ñ€w pärägräph
-
-= ä h€ädîñg =
-
-> ä blöckquöt€
-> ∫€cöñd lîñ€ öf blöckquöt€
->
-> ñ€w pärägräph wîthîñ blöckquöt€
-
-== äñöth€r h€ädîñg ==
-
-pärägräph wîthîñ ''multîpl€ '''∫tyl€∫''''' äñd <tt>tt ∫päñ</tt>
-
-∫îmîlär, but wîth '''∫tyl€∫ îñ ''dîff€r€ñt'' örd€r'''
-
-ägäîñ, ä '''dîff€r€ñt ''örd€r'''''
-
-* lî∫t ît€m 1
-** ñ€∫t€d lî∫t ît€m 1
-** ñ€∫t€d lî∫t ît€m 2
-** ñ€∫t€d lî∫t ît€m 3
-* lî∫t ît€m 2
-
- // thî∫ î∫ ä cöd€ blöck
- ñötîc€ höw ît cäñ cöñtäîñ ''märkup''
- whîch wöuld '''öth€rwî∫€''' häv€ <tt>∫p€cîäl</tt> m€äñîñg
- älthöugh €xplîcît €ñtîtî€∫ &cöpy; är€ pä∫∫€d thröugh uñchäñg€d
-
-ä ñörmäl pärägräph ägäîñ
-
-Thî∫ î∫ wh€r€ w€ ∫höw ä lîñk tö äñ ärtîcl€ öñ [[GCC]].
-R€lät€d tö thät, [[GCC|ä lîñk]] tö th€ ∫äm€
-ärtîcl€ but wîth cu∫töm lîñk t€xt.
-
-Ext€rñäl lîñk∫ [http://example.com wörk töö].
-A∫ w€ll ä∫ äutölîñk∫ ä∫ ∫€€ñ http://example.com/
-her€.
-
-Löök ät höw w€ häñdl€ bäd ∫yñtäx. [[Thî∫ î∫ äñ uñt€rmîñät€d
-lîñk. Añd [http://example.com/ î∫ äñöth€r.
-
-# thî∫ î∫ äñ örd€r€d lî∫t
-# whîch cöñtîñu€∫
-## äñd hä∫ äñöth€r örd€r€d lî∫t
-## ñ€∫t€d îñ∫îd€ ît
-# äñd th€ñ fäll∫ bäck
-#* äñd th€ñ ñ€∫t∫ äñöth€r lî∫t
-#* thî∫ tîm€ äñ uñörd€r€d öñ€
-#** ît∫€lf cöñtäîñîñg ä ñ€∫t€d lî∫t
-#** whîch cöñtîñu€∫
-#**# äñd fîñälly ñ€∫t∫ y€t äñöth€r örd€r€d lî∫t
-#**# whîch cöñtîñu€∫
-#* dröp∫ bäck quît€ ä wäy
-# äñd fîñälly äll th€ wäy
-#****** äñd fîñî∫h€∫ wîth äñ îñvälîd ît€m
-
-=== h€ädîñg wîth mî∫∫îñg clö∫îñg täg
-* lî∫t
-# ñ€w lî∫t
-SLAB
-
-longer_slab_of_UTF8_text_output = <<SLAB
-<p>pärägräph ∫€cöñd lîñ€</p>
-<p>ñ€w pärägräph</p>
-<h1>ä h€ädîñg</h1>
-<blockquote>
- <p>ä blöckquöt€ ∫€cöñd lîñ€ öf blöckquöt€</p>
- <p>ñ€w pärägräph wîthîñ blöckquöt€</p>
-</blockquote>
-<h2>äñöth€r h€ädîñg</h2>
-<p>pärägräph wîthîñ <em>multîpl€ <strong>∫tyl€∫</strong></em> äñd <tt>tt ∫päñ</tt></p>
-<p>∫îmîlär, but wîth <strong>∫tyl€∫ îñ <em>dîff€r€ñt</em> örd€r</strong></p>
-<p>ägäîñ, ä <strong>dîff€r€ñt <em>örd€r</em></strong></p>
-<ul>
- <li>lî∫t ît€m 1
- <ul>
- <li>ñ€∫t€d lî∫t ît€m 1</li>
- <li>ñ€∫t€d lî∫t ît€m 2</li>
- <li>ñ€∫t€d lî∫t ît€m 3</li>
- </ul>
- </li>
- <li>lî∫t ît€m 2</li>
-</ul>
-<pre>// thî∫ î∫ ä cöd€ blöck
-ñötîc€ höw ît cäñ cöñtäîñ ''märkup''
-whîch wöuld '''öth€rwî∫€''' häv€ <tt>∫p€cîäl</tt> m€äñîñg
-älthöugh €xplîcît €ñtîtî€∫ &cöpy; är€ pä∫∫€d thröugh uñchäñg€d</pre>
-<p>ä ñörmäl pärägräph ägäîñ</p>
-<p>Thî∫ î∫ wh€r€ w€ ∫höw ä lîñk tö äñ ärtîcl€ öñ <a href="/wiki/GCC">GCC</a>. R€lät€d tö thät, <a href="/wiki/GCC">ä lîñk</a> tö th€ ∫äm€ ärtîcl€ but wîth cu∫töm lîñk t€xt.</p>
-<p>Ext€rñäl lîñk∫ <a href="http://example.com" class="external">wörk töö</a>. A∫ w€ll ä∫ äutölîñk∫ ä∫ ∫€€ñ <a href="http://example.com/" class="external">http://example.com/</a> her€.</p>
-<p>Löök ät höw w€ häñdl€ bäd ∫yñtäx. [[Thî∫ î∫ äñ uñt€rmîñät€d lîñk. Añd [<a href="http://example.com/" class="external">http://example.com/</a> î∫ äñöth€r.</p>
-<ol>
- <li>thî∫ î∫ äñ örd€r€d lî∫t</li>
- <li>whîch cöñtîñu€∫
- <ol>
- <li>äñd hä∫ äñöth€r örd€r€d lî∫t</li>
- <li>ñ€∫t€d îñ∫îd€ ît</li>
- </ol>
- </li>
- <li>äñd th€ñ fäll∫ bäck
- <ul>
- <li>äñd th€ñ ñ€∫t∫ äñöth€r lî∫t</li>
- <li>thî∫ tîm€ äñ uñörd€r€d öñ€
- <ul>
- <li>ît∫€lf cöñtäîñîñg ä ñ€∫t€d lî∫t</li>
- <li>whîch cöñtîñu€∫
- <ol>
- <li>äñd fîñälly ñ€∫t∫ y€t äñöth€r örd€r€d lî∫t</li>
- <li>whîch cöñtîñu€∫</li>
- </ol>
- </li>
- </ul>
- </li>
- <li>dröp∫ bäck quît€ ä wäy</li>
- </ul>
- </li>
- <li>äñd fîñälly äll th€ wäy
- <ul>
- <li>***** äñd fîñî∫h€∫ wîth äñ îñvälîd ît€m</li>
- </ul>
- </li>
-</ol>
-<h3>h€ädîñg wîth mî∫∫îñg clö∫îñg täg</h3>
-<ul>
- <li>lî∫t</li>
-</ul>
-<ol>
- <li>ñ€w lî∫t</li>
-</ol>
-SLAB
-
-def parse job, description, parser, input
- job.report(description) do
- parser.profiling_parse input
- end
-end
-
-parser = Wikitext::Parser.new
-
-# sanity check
-raise 'mismatch (short slab of ASCII text)' unless (parser.parse(short_slab_of_ASCII_text) == short_slab_of_ASCII_text_output)
-raise 'mismatch (short slab of UTF-8 text)' unless (parser.parse(short_slab_of_UTF8_text) == short_slab_of_UTF8_text_output)
-raise 'mismatch (longer slab of ASCII text)' unless (parser.parse(longer_slab_of_ASCII_text) == longer_slab_of_ASCII_text_output)
-raise 'mismatch (longer slab of UTF-8 text)' unless (parser.parse(longer_slab_of_UTF8_text) == longer_slab_of_UTF8_text_output)
-
-Benchmark.bmbm do |job|
- parse job, 'short slab of ASCII text', parser, short_slab_of_ASCII_text
- parse job, 'short slab of UTF-8 text', parser, short_slab_of_UTF8_text
- parse job, 'longer slab of ASCII text', parser, longer_slab_of_ASCII_text
- parse job, 'longer slab of UTF-8 text', parser, longer_slab_of_UTF8_text
-end