]> git.wincent.com - wikitext.git/commitdiff
Replace more rb_str_append calls with rb_str_cat
authorWincent Colaiuta <win@wincent.com>
Tue, 5 Feb 2008 11:25:26 +0000 (12:25 +0100)
committerWincent Colaiuta <win@wincent.com>
Tue, 5 Feb 2008 11:25:26 +0000 (12:25 +0100)
This time for token text, and it yields an even bigger speed-up. Before:

  short slab of ASCII text    2.380000   0.010000   2.390000 (  2.461861)
  short slab of UTF-8 text    4.860000   0.000000   4.860000 (  5.016289)

After:

  short slab of ASCII text    1.570000   0.010000   1.580000 (  1.705596)
  short slab of UTF-8 text    3.280000   0.020000   3.300000 (  3.353919)

So that one change alone cuts 33% off the execution time. The combined
scanner/parser is now nearly 8 times faster than it was under ANTLR
and nearly 3 times faster than it was after the move to Ragel but before
I started optimizing the parser.

Signed-off-by: Wincent Colaiuta <win@wincent.com>
benchmarks/NOTES.txt
ext/parser.c
ext/token.h

index 4030dbc125c3dcece3a4b07be9634a11709ff540..16c82fd18e5dd2c28cd2b97c305d27cee5964fe5 100644 (file)
@@ -31,3 +31,7 @@ After replacing many rb_str_append calls with rb_str_cat
   short slab of ASCII text    2.380000   0.010000   2.390000 (  2.461861)
   short slab of UTF-8 text    4.860000   0.000000   4.860000 (  5.016289)
 
+More rb_str_append calls replaced with rb_str_cat (token text):
+
+  short slab of ASCII text    1.570000   0.010000   1.580000 (  1.705596)
+  short slab of UTF-8 text    3.280000   0.020000   3.300000 (  3.353919)
index e41f0da25105b886e590e75b1b9751dab4eb97d1..74ba79e53f03e75358a8ca1cfd3106d4c3d12536 100644 (file)
@@ -747,7 +747,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
             case BLOCKQUOTE:
                 if (ary_includes(scope, NO_WIKI_START))
                     // already in <nowiki> span (no need to check for <pre>; can never appear inside it)
-                    rb_str_append(output, TOKEN_TEXT(token));
+                    rb_str_cat(output, token->start, TOKEN_LEN(token));
                 else
                 {
                     ary_push(line, BLOCKQUOTE);
@@ -992,7 +992,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                 if (ary_includes(scope, NO_WIKI_START))
                 {
                     // already in <nowiki> span (no need to check for <pre>; can never appear inside it)
-                    rb_str_append(output, TOKEN_TEXT(token));
+                    rb_str_cat(output, token->start, TOKEN_LEN(token));
                     break;
                 }
 
@@ -1084,7 +1084,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                     while (k++, NEXT_TOKEN(), (type = token->type))
                     {
                         if (type == OL || type == UL)
-                            rb_str_append(output, TOKEN_TEXT(token));
+                            rb_str_cat(output, token->start, TOKEN_LEN(token));
                         else if (type == SPACE && k == 1)
                         {
                             // silently throw away the optional SPACE token after final list marker
@@ -1108,7 +1108,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                 if (ary_includes(scope, NO_WIKI_START))
                 {
                     // already in <nowiki> span (no need to check for <pre>; can never appear inside it)
-                    rb_str_append(output, TOKEN_TEXT(token));
+                    rb_str_cat(output, token->start, TOKEN_LEN(token));
                     break;
                 }
 
@@ -1302,11 +1302,10 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                 break;
 
             case URI:
-                i = TOKEN_TEXT(token); // the URI
                 if (ary_includes(scope, NO_WIKI_START))
                     // user can temporarily suppress autolinking by using <nowiki></nowiki>
                     // note that unlike MediaWiki, we do allow autolinking inside PRE blocks
-                    rb_str_append(output, i);
+                    rb_str_cat(output, token->start, TOKEN_LEN(token));
                 else if (ary_includes(scope, LINK_START))
                 {
                     // not yet implemented
@@ -1317,6 +1316,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                     if (NIL_P(link_target))
                     {
                         // this must be our link target: look ahead to make sure we see the space we're expecting to see
+                        i = TOKEN_TEXT(token);
                         NEXT_TOKEN();
                         if (token->type == SPACE)
                         {
@@ -1342,10 +1342,10 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                     {
                         if (NIL_P(link_text))
                             // this must be the first part of our link text
-                            link_text = i;
+                            link_text = TOKEN_TEXT(token);
                         else
                             // add to existing link text
-                            rb_str_append(link_text, i);
+                            rb_str_cat(link_text, token->start, TOKEN_LEN(token));
                     }
                 }
                 else
@@ -1353,6 +1353,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                     // in plain scope, will turn into autolink (with appropriate, user-configurable CSS)
                     _Wikitext_pop_excess_elements(capture, scope, line, output, line_ending);
                     _Wikitext_start_para_if_necessary(capture, scope, line, output, &pending_crlf);
+                    i = TOKEN_TEXT(token);
                     if (autolink == Qtrue)
                         i = _Wikitext_hyperlink(Qnil, i, i, link_class); // link target, link text, link class
                     rb_str_append(output, i);
@@ -1433,7 +1434,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                                 // don't insert the entity, insert the literal ampersand
                                 rb_str_cat(link_target, ampersand, sizeof(ampersand) - 1);
                             else
-                                rb_str_append(link_target, TOKEN_TEXT(token));
+                                rb_str_cat(link_target, token->start, TOKEN_LEN(token));
                         }
                         else if (type == LINK_END)
                             break; // jump back to top of loop (will handle this in LINK_END case below)
@@ -1583,13 +1584,14 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
 
             case SPACE:
                 i = NIL_P(capture) ? output : capture;
-                j = TOKEN_TEXT(token); // SPACE token may actually be a run of spaces
                 if (ary_includes(scope, NO_WIKI_START) || ary_includes(scope, PRE))
                     // already in <nowiki> span or <pre> block
-                    rb_str_append(i, j);
+                    rb_str_cat(i, token->start, TOKEN_LEN(token));
                 else
                 {
                     // peek ahead to see next token
+                    char    *token_ptr  = token->start;
+                    int     token_len   = TOKEN_LEN(token);
                     NEXT_TOKEN();
                     type = token->type;
                     if (((type == H6_END) && ary_includes(scope, H6_START)) ||
@@ -1606,7 +1608,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                         // emit the space
                         _Wikitext_pop_excess_elements(capture, scope, line, i, line_ending);
                         _Wikitext_start_para_if_necessary(capture, scope, line, i, &pending_crlf);
-                        rb_str_append(i, j);
+                        rb_str_cat(i, token_ptr, token_len);
                     }
 
                     // jump to top of the loop to process token we scanned during lookahead
@@ -1622,7 +1624,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                 i = NIL_P(capture) ? output : capture;
                 _Wikitext_pop_excess_elements(capture, scope, line, i, line_ending);
                 _Wikitext_start_para_if_necessary(capture, scope, line, i, &pending_crlf);
-                rb_str_append(i, TOKEN_TEXT(token));
+                rb_str_cat(i, token->start, TOKEN_LEN(token));
                 break;
 
             case HEX_ENTITY:
@@ -1738,7 +1740,7 @@ VALUE Wikitext_parser_parse(VALUE self, VALUE string)
                 i = NIL_P(capture) ? output : capture;
                 _Wikitext_pop_excess_elements(capture, scope, line, i, line_ending);
                 _Wikitext_start_para_if_necessary(capture, scope, line, i, &pending_crlf);
-                rb_str_append(i, TOKEN_TEXT(token));
+                rb_str_cat(i, token->start, TOKEN_LEN(token));
                 break;
 
             case DEFAULT:
index 58fc8ee62f247c76bc4349e90255ece5280b680d..5f5678583272d259cbc4127485ff47c68bf391e4 100644 (file)
@@ -16,6 +16,7 @@
 #include <stdint.h>     /* uint32_t */
 
 #define TOKEN_TEXT(token)   rb_str_new((const char *)token->start, (token->stop - token->start))
+#define TOKEN_LEN(token)    (token->stop - token->start)
 
 typedef struct
 {