// Copyright 2007-2008 Wincent Colaiuta // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . #include "parser.h" #include "ary.h" #include "str.h" #include "wikitext.h" #include "wikitext_ragel.h" #define IN(type) ary_includes(parser->scope, type) // poor man's object orientation in C: // instead of parsing around multiple parameters between functions in the parser // we pack everything into a struct and pass around only a pointer to that // TODO: consider changing some of the VALUE members (eg link_target) to the more efficient str_t type typedef struct { VALUE output; // for accumulating output to be returned VALUE capture; // for capturing substrings VALUE link_target; // short term "memory" for parsing links VALUE link_text; // short term "memory" for parsing links VALUE external_link_class; // CSS class applied to external links VALUE img_prefix; // path prepended when emitting img tags ary_t *scope; // stack for tracking scope ary_t *line; // stack for tracking scope as implied by current line ary_t *line_buffer; // stack for tracking raw tokens (not scope) on current line VALUE pending_crlf; // boolean (Qtrue or Qfalse) VALUE autolink; // boolean (Qtrue or Qfalse) VALUE treat_slash_as_special; // boolean (Qtrue or Qfalse) VALUE space_to_underscore; // boolean (Qtrue or Qfalse) VALUE special_link; // boolean (Qtrue or Qfalse): is the current link_target a "special" link? str_t *line_ending; int base_indent; // controlled by the :indent option to Wikitext::Parser#parse int current_indent; // fluctuates according to currently nested structures str_t *tabulation; // caching buffer for emitting indentation } parser_t; const char escaped_no_wiki_start[] = "<nowiki>"; const char escaped_no_wiki_end[] = "</nowiki>"; const char literal_strong_em[] = "'''''"; const char literal_strong[] = "'''"; const char literal_em[] = "''"; const char escaped_em_start[] = "<em>"; const char escaped_em_end[] = "</em>"; const char escaped_strong_start[] = "<strong>"; const char escaped_strong_end[] = "</strong>"; const char escaped_tt_start[] = "<tt>"; const char escaped_tt_end[] = "</tt>"; const char literal_h6[] = "======"; const char literal_h5[] = "====="; const char literal_h4[] = "===="; const char literal_h3[] = "==="; const char literal_h2[] = "=="; const char literal_h1[] = "="; const char pre_start[] = "
";
const char pre_end[]                    = "
"; const char escaped_pre_start[] = "<pre>"; const char escaped_pre_end[] = "</pre>"; const char blockquote_start[] = "
"; const char blockquote_end[] = "
"; const char escaped_blockquote_start[] = "<blockquote>"; const char escaped_blockquote_end[] = "</blockquote>"; const char strong_em_start[] = ""; const char strong_start[] = ""; const char strong_end[] = ""; const char em_start[] = ""; const char em_end[] = ""; const char tt_start[] = ""; const char tt_end[] = ""; const char ol_start[] = "
    "; const char ol_end[] = "
"; const char ul_start[] = "
    "; const char ul_end[] = "
"; const char li_start[] = "
  • "; const char li_end[] = "
  • "; const char h6_start[] = "
    "; const char h6_end[] = "
    "; const char h5_start[] = "
    "; const char h5_end[] = "
    "; const char h4_start[] = "

    "; const char h4_end[] = "

    "; const char h3_start[] = "

    "; const char h3_end[] = "

    "; const char h2_start[] = "

    "; const char h2_end[] = "

    "; const char h1_start[] = "

    "; const char h1_end[] = "

    "; const char p_start[] = "

    "; const char p_end[] = "

    "; const char space[] = " "; const char a_start[] = ""; const char a_end[] = ""; const char link_start[] = "[["; const char link_end[] = "]]"; const char separator[] = "|"; const char ext_link_start[] = "["; const char backtick[] = "`"; const char quote[] = "\""; const char ampersand[] = "&"; const char quot_entity[] = """; const char amp_entity[] = "&"; const char lt_entity[] = "<"; const char gt_entity[] = ">"; const char escaped_blockquote[] = "> "; const char ext_link_end[] = "]"; const char literal_img_start[] = "{{"; const char img_start[] = ""; const char img_alt[] = "\" alt=\""; // for testing and debugging only VALUE Wikitext_parser_tokenize(VALUE self, VALUE string) { if (NIL_P(string)) return Qnil; string = StringValue(string); VALUE tokens = rb_ary_new(); char *p = RSTRING_PTR(string); long len = RSTRING_LEN(string); char *pe = p + len; token_t token; next_token(&token, NULL, p, pe); rb_ary_push(tokens, _Wikitext_token(&token)); while (token.type != END_OF_FILE) { next_token(&token, &token, NULL, pe); rb_ary_push(tokens, _Wikitext_token(&token)); } return tokens; } // for benchmarking raw tokenization speed only VALUE Wikitext_parser_benchmarking_tokenize(VALUE self, VALUE string) { if (NIL_P(string)) return Qnil; string = StringValue(string); char *p = RSTRING_PTR(string); long len = RSTRING_LEN(string); char *pe = p + len; token_t token; next_token(&token, NULL, p, pe); while (token.type != END_OF_FILE) next_token(&token, &token, NULL, pe); return Qnil; } // we downcase "in place", overwriting the original contents of the buffer and returning the same string VALUE _Wikitext_downcase(VALUE string) { char *ptr = RSTRING_PTR(string); long len = RSTRING_LEN(string); for (long i = 0; i < len; i++) { if (ptr[i] >= 'A' && ptr[i] <= 'Z') ptr[i] += 32; } return string; } VALUE _Wikitext_hyperlink(VALUE link_prefix, VALUE link_target, VALUE link_text, VALUE link_class) { VALUE string = rb_str_new(a_start, sizeof(a_start) - 1); // rb_str_append(string, link_text); rb_str_cat(string, a_end, sizeof(a_end) - 1); return string; } void _Wikitext_append_img(parser_t *parser, char *token_ptr, int token_len) { rb_str_cat(parser->output, img_start, sizeof(img_start) - 1); // 
    rb_str_cat(parser->output, token_ptr, token_len);
    rb_str_cat(parser->output, img_end, sizeof(img_end) - 1);       // } // will emit indentation only if we are about to emit any of: //
    ,

    ,