2 # Copyright 2008-2013 Wincent Colaiuta. All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are met:
7 # 1. Redistributions of source code must retain the above copyright notice,
8 # this list of conditions and the following disclaimer.
9 # 2. Redistributions in binary form must reproduce the above copyright notice,
10 # this list of conditions and the following disclaimer in the documentation
11 # and/or other materials provided with the distribution.
13 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
17 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23 # POSSIBILITY OF SUCH DAMAGE.
25 require File.join(File.dirname(__FILE__), '..', 'ext', 'wikitext')
28 # 91 characters, 91 bytes
29 short_slab_of_ASCII_text = '* Lorem [[ipsum|hello]] dolor sit amet, `consectetuer` http://example.com/ adipiscing elit.'
31 # compare against the expected output to ensure correctness
32 short_slab_of_ASCII_text_output = <<SLAB
34 <li>Lorem <a href="/wiki/ipsum">hello</a> dolor sit amet, <tt>consectetuer</tt> <a href="http://example.com/" class="external">http://example.com/</a> adipiscing elit.</li>
38 # 91 characters, 122 bytes
39 short_slab_of_UTF8_text = '* Lór€m [[ïpsûm|h€llö]] dólór sït àm€t, `cóñs€ct€tû€r` http://example.com/ àdïpïscïñg €lït.'
41 short_slab_of_UTF8_text_output = <<SLAB
43 <li>Lór€m <a href="/wiki/%c3%afps%c3%bbm">h€llö</a> dólór sït àm€t, <tt>cóñs€ct€tû€r</tt> <a href="http://example.com/" class="external">http://example.com/</a> àdïpïscïñg €lït.</li>
47 # 1415 characters, 1415 bytes
48 longer_slab_of_ASCII_text = <<SLAB
57 > second line of blockquote
59 > new paragraph within blockquote
63 paragraph within ''multiple '''styles''''' and <tt>tt span</tt>
65 similar, but with '''styles in ''different'' order'''
67 again, a '''different ''order'''''
75 // this is a code block
76 notice how it can contain ''markup''
77 which would '''otherwise''' have <tt>special</tt> meaning
78 although explicit entities © are passed through unchanged
80 a normal paragraph again
82 This is where we show a link to an article on [[GCC]].
83 Related to that, [[GCC|a link]] to the same
84 article but with custom link text.
86 External links [http://example.com work too].
87 As well as autolinks as seen http://example.com/
90 Look at how we handle bad syntax. [[This is an unterminated
91 link. And [http://example.com/ is another.
93 # this is an ordered list
95 ## and has another ordered list
98 #* and then nests another list
99 #* this time an unordered one
100 #** itself containing a nested list
102 #**# and finally nests yet another ordered list
104 #* drops back quite a way
105 # and finally all the way
106 #****** and finishes with an invalid item
108 === heading with missing closing tag
113 longer_slab_of_ASCII_text_output = <<SLAB
114 <p>paragraph second line</p>
118 <p>a blockquote second line of blockquote</p>
119 <p>new paragraph within blockquote</p>
121 <h2>another heading</h2>
122 <p>paragraph within <em>multiple <strong>styles</strong></em> and <tt>tt span</tt></p>
123 <p>similar, but with <strong>styles in <em>different</em> order</strong></p>
124 <p>again, a <strong>different <em>order</em></strong></p>
128 <li>nested list item 1</li>
129 <li>nested list item 2</li>
130 <li>nested list item 3</li>
135 <pre>// this is a code block
136 notice how it can contain ''markup''
137 which would '''otherwise''' have <tt>special</tt> meaning
138 although explicit entities © are passed through unchanged</pre>
139 <p>a normal paragraph again</p>
140 <p>This is where we show a link to an article on <a href="/wiki/GCC">GCC</a>. Related to that, <a href="/wiki/GCC">a link</a> to the same article but with custom link text.</p>
141 <p>External links <a href="http://example.com" class="external">work too</a>. As well as autolinks as seen <a href="http://example.com/" class="external">http://example.com/</a> here.</p>
142 <p>Look at how we handle bad syntax. [[This is an unterminated link. And [<a href="http://example.com/" class="external">http://example.com/</a> is another.</p>
144 <li>this is an ordered list</li>
147 <li>and has another ordered list</li>
148 <li>nested inside it</li>
151 <li>and then falls back
153 <li>and then nests another list</li>
154 <li>this time an unordered one
156 <li>itself containing a nested list</li>
159 <li>and finally nests yet another ordered list</li>
160 <li>which continues</li>
165 <li>drops back quite a way</li>
168 <li>and finally all the way
170 <li>***** and finishes with an invalid item</li>
174 <h3>heading with missing closing tag</h3>
183 # 1415 characters, 2061 bytes
184 longer_slab_of_UTF8_text = <<SLAB
193 > ∫€cöñd lîñ€ öf blöckquöt€
195 > ñ€w pärägräph wîthîñ blöckquöt€
197 == äñöth€r h€ädîñg ==
199 pärägräph wîthîñ ''multîpl€ '''∫tyl€∫''''' äñd <tt>tt ∫päñ</tt>
201 ∫îmîlär, but wîth '''∫tyl€∫ îñ ''dîff€r€ñt'' örd€r'''
203 ägäîñ, ä '''dîff€r€ñt ''örd€r'''''
206 ** ñ€∫t€d lî∫t ît€m 1
207 ** ñ€∫t€d lî∫t ît€m 2
208 ** ñ€∫t€d lî∫t ît€m 3
211 // thî∫ î∫ ä cöd€ blöck
212 ñötîc€ höw ît cäñ cöñtäîñ ''märkup''
213 whîch wöuld '''öth€rwî∫€''' häv€ <tt>∫p€cîäl</tt> m€äñîñg
214 älthöugh €xplîcît €ñtîtî€∫ &cöpy; är€ pä∫∫€d thröugh uñchäñg€d
216 ä ñörmäl pärägräph ägäîñ
218 Thî∫ î∫ wh€r€ w€ ∫höw ä lîñk tö äñ ärtîcl€ öñ [[GCC]].
219 R€lät€d tö thät, [[GCC|ä lîñk]] tö th€ ∫äm€
220 ärtîcl€ but wîth cu∫töm lîñk t€xt.
222 Ext€rñäl lîñk∫ [http://example.com wörk töö].
223 A∫ w€ll ä∫ äutölîñk∫ ä∫ ∫€€ñ http://example.com/
226 Löök ät höw w€ häñdl€ bäd ∫yñtäx. [[Thî∫ î∫ äñ uñt€rmîñät€d
227 lîñk. Añd [http://example.com/ î∫ äñöth€r.
229 # thî∫ î∫ äñ örd€r€d lî∫t
231 ## äñd hä∫ äñöth€r örd€r€d lî∫t
233 # äñd th€ñ fäll∫ bäck
234 #* äñd th€ñ ñ€∫t∫ äñöth€r lî∫t
235 #* thî∫ tîm€ äñ uñörd€r€d öñ€
236 #** ît∫€lf cöñtäîñîñg ä ñ€∫t€d lî∫t
238 #**# äñd fîñälly ñ€∫t∫ y€t äñöth€r örd€r€d lî∫t
240 #* dröp∫ bäck quît€ ä wäy
241 # äñd fîñälly äll th€ wäy
242 #****** äñd fîñî∫h€∫ wîth äñ îñvälîd ît€m
244 === h€ädîñg wîth mî∫∫îñg clö∫îñg täg
249 longer_slab_of_UTF8_text_output = <<SLAB
250 <p>pärägräph ∫€cöñd lîñ€</p>
251 <p>ñ€w pärägräph</p>
252 <h1>ä h€ädîñg</h1>
254 <p>ä blöckquöt€ ∫€cöñd lîñ€ öf blöckquöt€</p>
255 <p>ñ€w pärägräph wîthîñ blöckquöt€</p>
257 <h2>äñöth€r h€ädîñg</h2>
258 <p>pärägräph wîthîñ <em>multîpl€ <strong>∫tyl€∫</strong></em> äñd <tt>tt ∫päñ</tt></p>
259 <p>∫îmîlär, but wîth <strong>∫tyl€∫ îñ <em>dîff€r€ñt</em> örd€r</strong></p>
260 <p>ägäîñ, ä <strong>dîff€r€ñt <em>örd€r</em></strong></p>
262 <li>lî∫t ît€m 1
264 <li>ñ€∫t€d lî∫t ît€m 1</li>
265 <li>ñ€∫t€d lî∫t ît€m 2</li>
266 <li>ñ€∫t€d lî∫t ît€m 3</li>
269 <li>lî∫t ît€m 2</li>
271 <pre>// thî∫ î∫ ä cöd€ blöck
272 ñötîc€ höw ît cäñ cöñtäîñ ''märkup''
273 whîch wöuld '''öth€rwî∫€''' häv€ <tt>∫p€cîäl</tt> m€äñîñg
274 älthöugh €xplîcît €ñtîtî€∫ &cöpy; är€ pä∫∫€d thröugh uñchäñg€d</pre>
275 <p>ä ñörmäl pärägräph ägäîñ</p>
276 <p>Thî∫ î∫ wh€r€ w€ ∫höw ä lîñk tö äñ ärtîcl€ öñ <a href="/wiki/GCC">GCC</a>. R€lät€d tö thät, <a href="/wiki/GCC">ä lîñk</a> tö th€ ∫äm€ ärtîcl€ but wîth cu∫töm lîñk t€xt.</p>
277 <p>Ext€rñäl lîñk∫ <a href="http://example.com" class="external">wörk töö</a>. A∫ w€ll ä∫ äutölîñk∫ ä∫ ∫€€ñ <a href="http://example.com/" class="external">http://example.com/</a> her€.</p>
278 <p>Löök ät höw w€ häñdl€ bäd ∫yñtäx. [[Thî∫ î∫ äñ uñt€rmîñät€d lîñk. Añd [<a href="http://example.com/" class="external">http://example.com/</a> î∫ äñöth€r.</p>
280 <li>thî∫ î∫ äñ örd€r€d lî∫t</li>
281 <li>whîch cöñtîñu€∫
283 <li>äñd hä∫ äñöth€r örd€r€d lî∫t</li>
284 <li>ñ€∫t€d îñ∫îd€ ît</li>
287 <li>äñd th€ñ fäll∫ bäck
289 <li>äñd th€ñ ñ€∫t∫ äñöth€r lî∫t</li>
290 <li>thî∫ tîm€ äñ uñörd€r€d öñ€
292 <li>ît∫€lf cöñtäîñîñg ä ñ€∫t€d lî∫t</li>
293 <li>whîch cöñtîñu€∫
295 <li>äñd fîñälly ñ€∫t∫ y€t äñöth€r örd€r€d lî∫t</li>
296 <li>whîch cöñtîñu€∫</li>
301 <li>dröp∫ bäck quît€ ä wäy</li>
304 <li>äñd fîñälly äll th€ wäy
306 <li>***** äñd fîñî∫h€∫ wîth äñ îñvälîd ît€m</li>
310 <h3>h€ädîñg wîth mî∫∫îñg clö∫îñg täg</h3>
312 <li>lî∫t</li>
315 <li>ñ€w lî∫t</li>
319 def parse job, description, parser, input
320 job.report(description) do
321 parser.profiling_parse input
325 parser = Wikitext::Parser.new
328 raise 'mismatch (short slab of ASCII text)' unless (parser.parse(short_slab_of_ASCII_text) == short_slab_of_ASCII_text_output)
329 raise 'mismatch (short slab of UTF-8 text)' unless (parser.parse(short_slab_of_UTF8_text) == short_slab_of_UTF8_text_output)
330 raise 'mismatch (longer slab of ASCII text)' unless (parser.parse(longer_slab_of_ASCII_text) == longer_slab_of_ASCII_text_output)
331 raise 'mismatch (longer slab of UTF-8 text)' unless (parser.parse(longer_slab_of_UTF8_text) == longer_slab_of_UTF8_text_output)
333 Benchmark.bmbm do |job|
334 parse job, 'short slab of ASCII text', parser, short_slab_of_ASCII_text
335 parse job, 'short slab of UTF-8 text', parser, short_slab_of_UTF8_text
336 parse job, 'longer slab of ASCII text', parser, longer_slab_of_ASCII_text
337 parse job, 'longer slab of UTF-8 text', parser, longer_slab_of_UTF8_text