1 {-# LANGUAGE FlexibleContexts #-}
3 module Docvim.Parse ( parse
9 import Control.Applicative ( (*>)
17 import Data.Char (toUpper)
18 import Data.List (groupBy, intercalate)
20 import System.Exit (exitFailure)
21 import System.IO (hPutStrLn, stderr)
22 -- TODO: custom error messages with <?>
23 import Text.Parsec ( (<|>)
47 import Text.Parsec.String (Parser, parseFromFile)
48 import Text.Parsec.Combinator (eof)
49 import Text.ParserCombinators.Parsec.Char ( alphaNum
58 -- | Given a `description` like "fu[nction]", returns a parser that matches
59 -- "fu", "fun", "func", "funct", "functi", "functio" and "function".
61 -- Beware, may explode at runtime if passed an invalid `description`, due to the
64 -- Requires the FlexibleContexts extension, for reasons that I don't yet fully
66 command description = try (string prefix >> remainder rest)
68 where prefix = takeWhile (/= '[') description
69 rest = init (snd (splitAt (1 + length prefix) description))
70 remainder [r] = optional (char r)
71 remainder (r:rs) = optional (char r >> remainder rs)
73 function = FunctionDeclaration
74 <$> (fu *> bang <* wsc)
75 <*> (name <* optional wsc)
77 <*> (attributes <* optional wsc)
78 <*> (skippable *> many node <* (optional ws >> endfunction))
80 fu = command "fu[nction]"
81 name = choice [script, normal, autoloaded] <* optional wsc
82 script = liftA2 (++) (try $ string "s:") (many $ oneOf identifier)
83 normal = liftA2 (++) (many1 upper) (many $ oneOf identifier)
85 a <- many1 $ oneOf identifier
87 c <- sepBy1 (many1 $ oneOf identifier) (string "#")
88 return $ a ++ b ++ intercalate "#" c
89 identifier = ['a'..'z'] ++ ['A'..'Z'] ++ ['0'..'9'] ++ "_"
90 arguments = (char '(' >> optional wsc)
91 *> (ArgumentList <$> argument `sepBy` (char ',' >> optional wsc))
92 <* (optional wsc >> char ')' >> optional wsc)
93 argument = Argument <$> (string "..." <|> many1 alphaNum) <* optional wsc
94 attributes = choice [string "abort", string "range", string "dict"] `sepEndBy` wsc
96 -- Disambiguate `:endf[unction]` and `:endfo[r]`
97 endfunction = lookAhead (string "endf" >> notFollowedBy (string "o"))
98 >> command "endf[unction]"
101 lStatement = lookAhead (char 'l')
102 >> choice [ try (lookAhead (string "lw")) >> lwindow
103 , try (lookAhead (string "let")) >> letStatement
107 lwindow = LwindowStatement <$> (lw *> height <* eos)
109 lw = command "l[window]"
110 height = optionMaybe (wsc *> number)
111 number = liftA read (many1 digit)
113 lexpr = LexprStatement
114 <$> (command "lex[pr]" *> bang <* wsc)
117 -- "let" is a reserved word in Haskell, so we call this "letStatement" instead.
118 letStatement = LetStatement
119 <$> (string "let" >> wsc >> lhs)
120 <*> (optional wsc >> char '=' >> optional wsc *> rhs <* eos)
122 -- Kludge alert! Until we get a full expression parser, we use this crude
124 lhs = many1 $ noneOf "\"\n="
125 rhs = many1 $ noneOf "\n"
127 unlet = UnletStatement
128 <$> (unl *> bang <* wsc)
132 unl = command "unl[et]"
134 quote = string "\"" <?> "quote"
135 commentStart = quote <* (notFollowedBy quote >> optional ws)
136 docBlockStart = (string "\"\"" <* optional ws) <?> "\"\""
138 separator = Separator <$ (try (string "---") >> optional ws) <?> "wat"
140 fenced = fence >> newline >> Fenced <$> body
142 fence = try $ string "```" >> optional ws
144 lines <- manyTill line (try $ (commentStart <|> docBlockStart) >> optional ws >> fence)
145 let indent = foldr countLeadingSpaces infinity lines
146 return $ map (trimLeadingSpace indent) lines
148 -- Find minimum count of leading spaces.
149 countLeadingSpaces line = min (length (takeWhile (' ' ==) line))
150 trimLeadingSpace count = if count > 0
153 infinity = maxBound :: Int
154 line = (commentStart' <|> docBlockStart') >> restOfLine <* newline
155 commentStart' = quote <* notFollowedBy quote
156 docBlockStart' = string "\"\"" <?> "\"\""
158 blockquote = lookAhead (char '>')
160 <$> paragraph `sepBy1` blankLine
162 paragraph = Paragraph <$> body
165 rest <- many otherLine
166 -- Make every line end with whitespace.
167 let nodes = concatMap appendWhitespace (first:rest)
168 -- Collapse consecutive whitespace.
169 let compressed = compress nodes
170 -- Trim final whitespace.
171 return ( if last compressed == Whitespace
176 >> many1 (choice [phrasing, whitespace])
177 otherLine = try $ newline
178 >> (commentStart <|> docBlockStart)
180 blankLine = try $ newline
181 >> (commentStart <|> docBlockStart)
182 >> many1 (try $ char '>'
185 >> (commentStart <|> docBlockStart))
187 list = lookAhead (char '-' >> notFollowedBy (char '-'))
189 <$> listItem `sepBy1` separator
191 -- Yes, this is a bit hideous.
192 separator = try $ newline
193 >> (commentStart <|> docBlockStart)
195 >> lookAhead (char '-')
197 listItem = lookAhead (char '-' >> notFollowedBy (char '-'))
203 rest <- many otherLine
204 -- Make every line end with whitespace.
205 let nodes = concatMap appendWhitespace (first:rest)
206 -- Collapse consecutive whitespace.
207 let compressed = compress nodes
208 -- Trim final whitespace.
209 return ( if last compressed == Whitespace
214 >> many1 (choice [phrasing, whitespace])
215 otherLine = try $ newline
216 >> (commentStart <|> docBlockStart)
217 -- TODO ^ DRY this up?
219 >> lookAhead (noneOf "-")
220 >> many1 (choice [phrasing, whitespace])
222 -- | Newline (and slurps up following horizontal whitespace as well).
223 newline = (char '\n' >> optional ws) <|> eof
224 newlines = many1 (char '\n' >> optional ws)
225 <|> (eof >> return [()])
227 -- | Whitespace (specifically, horizontal whitespace: spaces and tabs).
228 ws = many1 (oneOf " \t")
230 -- | Continuation-aware whitespace (\).
231 wsc = many1 $ choice [whitespace, continuation]
233 whitespace = oneOf " \t"
234 continuation = try $ char '\n' >> ws >> char '\\'
236 -- TODO: string literals; some nasty lookahead might be required
239 >> notFollowedBy quote
241 >> skipMany (char '\n' >> optional ws)
243 -- | Optional bang suffix for VimL commands.
244 bang = option False (True <$ char '!')
246 -- | End-of-statement.
247 -- TODO: see `:h :bar` for a list of commands which see | as an arg instead of a
248 -- command separator.
249 eos = optional ws >> choice [bar, ws', skipMany1 comment]
251 bar = char '|' >> optional wsc
252 ws' = newlines >> notFollowedBy wsc
255 node = choice [ docBlock
258 <* optional skippable
260 docBlock = lookAhead docBlockStart
261 >> (DocBlock <$> many1 blockElement)
262 <* trailingBlankCommentLines
264 blockElement = try $ start
265 >> skipMany emptyLines
266 *> choice [ annotation
267 , try subheading -- must come before heading
274 , paragraph -- must come last
277 start = try docBlockStart <|> commentStart
278 emptyLines = try $ newline >> start
279 next = optional ws >> newline
280 trailingBlankCommentLines = skipMany $ start >> newline
282 paragraph = Paragraph <$> body
286 rest <- many otherLine
287 -- Make every line end with whitespace
288 let nodes = concatMap appendWhitespace (first:rest)
289 -- Collapse consecutive whitespace
290 let compressed = compress nodes
291 -- Trim final whitespace
292 return ( if last compressed == Whitespace
295 firstLine = many1 $ choice [phrasing, whitespace]
296 otherLine = try $ newline
297 >> (commentStart <|> docBlockStart)
299 >> notFollowedBy special
302 -- | Used in lookahead rules to make sure that we don't greedily consume special
303 -- tokens as if they were just phrasing content.
304 special :: Parser String
305 special = choice [ string "-" <* notFollowedBy (char '-')
308 , string "-" <* string "--"
310 , string "`" <* string "``"
315 phrasing = choice [ br
321 -- | Appends a Whitespace token to a list of nodes.
322 appendWhitespace :: [Node] -> [Node]
323 appendWhitespace xs = xs ++ [Whitespace]
325 -- | Compress whitespace.
326 -- Consecutive Whitespace tokens are replaced with a single token.
327 -- If a run of whitespace includes a BreakTag, the run is replaced with the
329 compress :: [Node] -> [Node]
330 compress = map prioritizeBreakTag . group
333 fn BreakTag Whitespace = True
334 fn Whitespace BreakTag = True
335 fn Whitespace Whitespace = True
337 prioritizeBreakTag xs = if hasBreakTag xs
340 hasBreakTag = elem BreakTag
341 -- similar to "word"... might end up replacing "word" later on...
342 -- something more sophisticated here with satisfy?
343 plaintext = Plaintext <$> wordChars
345 wordChars = many1 $ choice [ try $ char '<' <* notFollowedBy (string' "br")
349 -- | Case-insensitive char match.
351 -- Based on `caseChar` function in:
352 -- https://hackage.haskell.org/package/hsemail-1.3/docs/Text-ParserCombinators-Parsec-Rfc2234.html
353 char' c = satisfy $ \x -> toUpper x == toUpper c
355 -- | Case-insensitive string match.
357 -- Based on `caseString` function in:
358 -- https://hackage.haskell.org/package/hsemail-1.3/docs/Text-ParserCombinators-Parsec-Rfc2234.html
359 string' s = mapM_ char' s >> pure s <?> s
361 -- | Tokenized whitespace.
363 -- Most whitespace is insignificant and gets omitted from the AST, but
364 -- whitespace inside "phrasing content" is significant so is preserved (in
365 -- normalized form) in the AST.
366 whitespace = Whitespace <$ ws
368 br = BreakTag <$ (try htmlTag <|> try xhtmlTag) <?> "<br />"
370 htmlTag = string' "<br>"
371 xhtmlTag = string' "<br" >> optional ws >> string "/>"
373 link = Link <$> (bar *> linkText <* bar)
376 linkText = many1 $ noneOf " \t\n|"
378 code = Code <$> (backtick *> codeText <* backtick)
381 codeText = many $ noneOf "\n`"
383 -- TODO: record this in symbol table similar to
384 -- https://github.com/wincent/docvim/blob/js/src/SymbolVisitor.js
385 -- (probably want to make this a post-processing step?)
386 linkTargets = LinkTargets <$> many1 (star *> target <* (star >> optional ws))
389 target = many1 $ noneOf " \t\n*"
391 vimL = choice [ block
395 block = choice [ function ]
396 statement = choice [ lStatement
401 -- | Generic VimL node parser to represent stuff that we haven't built out full parsing
403 genericStatement = do
404 -- Make sure we never recognize `endfunction` as a generic statement. This is
405 -- necessary because we call `node` recursively inside `function` while
406 -- parsing the function body. We must stop `node` from consuming
407 -- `endfunction`, otherwise the `function` parse will fail to find it.
408 notFollowedBy endfunction
409 atoms <- sepEndBy1 word (optional wsc)
411 return $ GenericStatement $ unwords atoms
413 -- | Remainder of the line up to but not including a newline.
414 -- Does not include any trailing whitespace.
415 restOfLine :: Parser String
417 rest <- many (noneOf "\n")
420 -- | Strip trailing and leading whitespace.
422 -- Not efficient, but chosen for readablility.
424 -- TODO: switch to Data.Text (http://stackoverflow.com/a/6270382/2103996) for
426 strip = lstrip . rstrip
428 -- | Strip leading (left) whitespace.
429 lstrip = dropWhile (`elem` " \n\t")
431 -- | Strip trailing (right) whitespace.
432 rstrip = reverse . lstrip . reverse
434 heading :: Parser Node
436 >> notFollowedBy (char '#')
438 >> HeadingAnnotation <$> restOfLine
440 subheading :: Parser Node
441 subheading = string "##"
443 >> SubheadingAnnotation <$> restOfLine
445 -- | Match a "word" of non-whitespace characters.
446 word = many1 (noneOf " \n\t")
448 -- TODO: only allow these after "" and " at start of line
449 annotation :: Parser Node
450 annotation = char '@' *> annotationName
453 choice [ try $ string "commands" >> pure CommandsAnnotation -- must come before function
455 , string "dedent" >> pure DedentAnnotation
456 , try $ string "footer" >> pure FooterAnnotation -- must come before function
457 , try $ string "functions" >> pure FunctionsAnnotation -- must come before function
459 , string "indent" >> pure IndentAnnotation
460 , try $ string "mappings" >> pure MappingsAnnotation -- must come before mapping
462 , try $ string "options" >> pure OptionsAnnotation -- must come before option
467 command = string "command" >> ws >> CommandAnnotation <$> ((:) <$> char ':' <*> many1 (noneOf "\n"))
469 function = string "function" >> ws >> FunctionAnnotation <$> word <* optional ws
471 mapping = string "mapping" >> ws >> MappingAnnotation <$> mappingName
472 mappingName = word <* optional ws
474 option = string "option" >> ws >> OptionAnnotation <$> optionName <*> optionType <*> optionDefault
475 optionName = many1 (alphaNum <|> char ':') <* ws <?> "option name"
476 optionType = many1 alphaNum <* ws <?> "option type"
477 -- BUG: this is not optional, it would seem (see tests/fixtures/vim/options.vim)
478 optionDefault = optionMaybe word <?> "option default value"
480 plugin = string "plugin" >> ws >> PluginAnnotation <$> pluginName <*> plugInDescription
481 pluginName = many1 alphaNum <* ws
482 plugInDescription = restOfLine
484 -- | Parses a translation unit (file contents) into an AST.
487 <$> (skippable >> many node)
490 skippable = many $ choice [ comment
492 , skipMany1 (char '\n')
495 parse :: String -> IO Node
496 parse fileName = parseFromFile unit fileName >>= either report return
499 hPutStrLn stderr $ "Error: " ++ show err