]> git.wincent.com - wikitext.git/blob - ext/token.c
Add tokenization of literal blockquote tags
[wikitext.git] / ext / token.c
1 // Copyright 2008 Wincent Colaiuta
2 // This program is free software: you can redistribute it and/or modify
3 // it under the terms of the GNU General Public License as published by
4 // the Free Software Foundation, either version 3 of the License, or
5 // (at your option) any later version.
6 //
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License for more details.
11 //
12 // You should have received a copy of the GNU General Public License
13 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
14
15 #include "token.h"
16 #include "wikitext.h"
17
18 // return a hash of token types
19 // we make this available for unit testing purposes
20
21 VALUE Wikitext_parser_token_types(VALUE self)
22 {
23     VALUE hash = rb_hash_new();
24
25 #define SET_TOKEN_TYPE(identifier)  (void)rb_hash_aset(hash, INT2FIX(identifier), \
26     rb_funcall(rb_funcall(rb_str_new2(#identifier), rb_intern("downcase"), 0), rb_intern("to_sym"), 0))
27
28     SET_TOKEN_TYPE(NO_TOKEN);
29     SET_TOKEN_TYPE(P);
30     SET_TOKEN_TYPE(LI);
31     SET_TOKEN_TYPE(NESTED_LIST);
32     SET_TOKEN_TYPE(PRE);
33     SET_TOKEN_TYPE(PRE_START);
34     SET_TOKEN_TYPE(PRE_END);
35     SET_TOKEN_TYPE(NO_WIKI_START);
36     SET_TOKEN_TYPE(NO_WIKI_END);
37     SET_TOKEN_TYPE(BLOCKQUOTE);
38     SET_TOKEN_TYPE(BLOCKQUOTE_START);
39     SET_TOKEN_TYPE(BLOCKQUOTE_END);
40     SET_TOKEN_TYPE(STRONG_EM);
41     SET_TOKEN_TYPE(STRONG_START);
42     SET_TOKEN_TYPE(STRONG_END);
43     SET_TOKEN_TYPE(STRONG);
44     SET_TOKEN_TYPE(EM_START);
45     SET_TOKEN_TYPE(EM_END);
46     SET_TOKEN_TYPE(EM);
47     SET_TOKEN_TYPE(TT_START);
48     SET_TOKEN_TYPE(TT_END);
49     SET_TOKEN_TYPE(TT);
50     SET_TOKEN_TYPE(OL);
51     SET_TOKEN_TYPE(UL);
52     SET_TOKEN_TYPE(H6_START);
53     SET_TOKEN_TYPE(H5_START);
54     SET_TOKEN_TYPE(H4_START);
55     SET_TOKEN_TYPE(H3_START);
56     SET_TOKEN_TYPE(H2_START);
57     SET_TOKEN_TYPE(H1_START);
58     SET_TOKEN_TYPE(H6_END);
59     SET_TOKEN_TYPE(H5_END);
60     SET_TOKEN_TYPE(H4_END);
61     SET_TOKEN_TYPE(H3_END);
62     SET_TOKEN_TYPE(H2_END);
63     SET_TOKEN_TYPE(H1_END);
64     SET_TOKEN_TYPE(URI);
65     SET_TOKEN_TYPE(MAIL);
66     SET_TOKEN_TYPE(LINK_START);
67     SET_TOKEN_TYPE(LINK_END);
68     SET_TOKEN_TYPE(EXT_LINK_START);
69     SET_TOKEN_TYPE(EXT_LINK_END);
70     SET_TOKEN_TYPE(SEPARATOR);
71     SET_TOKEN_TYPE(SPACE);
72     SET_TOKEN_TYPE(QUOT_ENTITY);
73     SET_TOKEN_TYPE(AMP_ENTITY);
74     SET_TOKEN_TYPE(NAMED_ENTITY);
75     SET_TOKEN_TYPE(HEX_ENTITY);
76     SET_TOKEN_TYPE(DECIMAL_ENTITY);
77     SET_TOKEN_TYPE(QUOT);
78     SET_TOKEN_TYPE(AMP);
79     SET_TOKEN_TYPE(LESS);
80     SET_TOKEN_TYPE(GREATER);
81     SET_TOKEN_TYPE(CRLF);
82     SET_TOKEN_TYPE(PRINTABLE);
83     SET_TOKEN_TYPE(DEFAULT);
84     SET_TOKEN_TYPE(END_OF_FILE);
85
86 #undef SET_TOKEN_TYPE
87
88     return hash;
89 }
90
91 // for testing and debugging only
92 VALUE _Wikitext_token(token_t *token)
93 {
94     VALUE object = rb_class_new_instance(0, NULL, cWikitextParserToken);
95     (void)rb_iv_set(object, "@start",           LONG2NUM((long)token->start));
96     (void)rb_iv_set(object, "@stop",            LONG2NUM((long)token->stop));
97     (void)rb_iv_set(object, "@line_start",      LONG2NUM(token->line_start));
98     (void)rb_iv_set(object, "@line_stop",       LONG2NUM(token->line_stop));
99     (void)rb_iv_set(object, "@column_start",    LONG2NUM(token->column_start));
100     (void)rb_iv_set(object, "@column_stop",     LONG2NUM(token->column_stop));
101     (void)rb_iv_set(object, "@code_point",      INT2NUM(token->code_point));
102
103     // look-up the token type
104     VALUE types = Wikitext_parser_token_types(Qnil);
105     VALUE type  = rb_hash_aref(types, INT2FIX(token->type));
106     (void)rb_iv_set(object, "@token_type",      type);
107     (void)rb_iv_set(object, "@string_value",    rb_str_new(token->start, token->stop - token->start));
108     return object;
109 }