hunt.markdown.internal.DocumentParser source code

1 module hunt.markdown.internal.DocumentParser;
2 
3 import hunt.markdown.internal.ReferenceParser;
4 import hunt.markdown.internal.util.Parsing;
5 import hunt.markdown.internal.DocumentBlockParser;
6 import hunt.markdown.internal.BlockQuoteParser;
7 import hunt.markdown.internal.BlockStartImpl;
8 import hunt.markdown.internal.HeadingParser;
9 import hunt.markdown.internal.HtmlBlockParser;
10 import hunt.markdown.internal.FencedCodeBlockParser;
11 import hunt.markdown.internal.ThematicBreakParser;
12 import hunt.markdown.internal.ListBlockParser;
13 import hunt.markdown.internal.IndentedCodeBlockParser;
14 import hunt.markdown.internal.ParagraphParser;
15 import hunt.markdown.node.Block;
16 import hunt.markdown.node.Document;
17 import hunt.markdown.node.BlockQuote;
18 import hunt.markdown.node.FencedCodeBlock;
19 import hunt.markdown.node.Heading;
20 import hunt.markdown.node.HtmlBlock;
21 import hunt.markdown.node.ThematicBreak;
22 import hunt.markdown.node.ListBlock;
23 import hunt.markdown.node.IndentedCodeBlock;
24 import hunt.markdown.node.Paragraph;
25 import hunt.markdown.parser.InlineParser;
26 import hunt.markdown.parser.block.BlockParser;
27 import hunt.markdown.parser.block.BlockParserFactory;
28 import hunt.markdown.parser.block.ParserState;
29 import hunt.markdown.parser.block.BlockStart;
30 import hunt.markdown.parser.block.MatchedBlockParser;
31 import hunt.markdown.parser.block.BlockContinue;
32 import hunt.markdown.internal.BlockContinueImpl;
33 import hunt.markdown.parser.block.AbstractBlockParser;
34 
35 import hunt.collection.Collections;
36 import hunt.collection.Map;
37 import hunt.collection.Set;
38 import hunt.collection.List;
39 import hunt.collection.HashSet;
40 import hunt.collection.HashMap;
41 import hunt.collection.LinkedHashMap;
42 import hunt.collection.LinkedHashSet;
43 import hunt.collection.ArrayList;
44 import hunt.Exceptions;
45 import hunt.logging;
46 import hunt.text;
47 import std.stdio;
48 
49 class DocumentParser : ParserState
50 {
51 
52     private  __gshared Set!(TypeInfo_Class) CORE_FACTORY_TYPES;
53 
54     private  __gshared Map!(TypeInfo_Class, BlockParserFactory) NODES_TO_CORE_FACTORIES;
55 
56     shared static this()
57     {
58         CORE_FACTORY_TYPES = new LinkedHashSet!(TypeInfo_Class)([typeid(BlockQuote), typeid(Heading),
59                 typeid(FencedCodeBlock), typeid(HtmlBlock), typeid(ThematicBreak),
60                 typeid(ListBlock), typeid(IndentedCodeBlock)]);
61 
62         Map!(TypeInfo_Class, BlockParserFactory) map = new HashMap!(TypeInfo_Class,
63                 BlockParserFactory)();
64         map.put(typeid(BlockQuote), new BlockQuoteParser.Factory());
65         map.put(typeid(Heading), new HeadingParser.Factory());
66         map.put(typeid(FencedCodeBlock), new FencedCodeBlockParser.Factory());
67         map.put(typeid(HtmlBlock), new HtmlBlockParser.Factory());
68         map.put(typeid(ThematicBreak), new ThematicBreakParser.Factory());
69         map.put(typeid(ListBlock), new ListBlockParser.Factory());
70         map.put(typeid(IndentedCodeBlock), new IndentedCodeBlockParser.Factory());
71 
72         NODES_TO_CORE_FACTORIES = map; // Collections.unmodifiableMap(map);
73     }
74 
75     private string line;
76 
77     /**
78      * current index (offset) in input line (0-based)
79      */
80     private int index = 0;
81 
82     /**
83      * current column of input line (tab causes column to go to next 4-space tab stop) (0-based)
84      */
85     private int column = 0;
86 
87     /**
88      * if the current column is within a tab character (partially consumed tab)
89      */
90     private bool columnIsInTab;
91 
92     private int nextNonSpace = 0;
93     private int nextNonSpaceColumn = 0;
94     private int indent = 0;
95     private bool blank;
96 
97     private List!(BlockParserFactory) blockParserFactories;
98     private InlineParser inlineParser;
99     private DocumentBlockParser documentBlockParser;
100 
101     private List!(BlockParser) activeBlockParsers;
102     private Set!(BlockParser) allBlockParsers;
103 
104     public this(List!(BlockParserFactory) blockParserFactories, InlineParser inlineParser)
105     {
106 
107         activeBlockParsers = new ArrayList!(BlockParser)();
108         allBlockParsers = new HashSet!(BlockParser)();
109         version(HUNT_DEBUG)logDebug("blockParserFactories size :",blockParserFactories.size);
110         this.blockParserFactories = blockParserFactories;
111         this.inlineParser = inlineParser;
112 
113         this.documentBlockParser = new DocumentBlockParser();
114         activateBlockParser(this.documentBlockParser);
115     }
116 
117     public static Set!(TypeInfo_Class) getDefaultBlockParserTypes()
118     {
119         return CORE_FACTORY_TYPES;
120     }
121 
122     public static List!(BlockParserFactory) calculateBlockParserFactories(List!(
123             BlockParserFactory) customBlockParserFactories, Set!(TypeInfo_Class) enabledBlockTypes)
124     {
125         List!(BlockParserFactory) list = new ArrayList!(BlockParserFactory)();
126         // By having the custom factories come first, extensions are able to change behavior of core syntax.
127         list.addAll(customBlockParserFactories);
128         foreach (blockType; enabledBlockTypes)
129         {
130             list.add(NODES_TO_CORE_FACTORIES.get(blockType));
131         }
132         return list;
133     }
134 
135     /**
136      * The main parsing function. Returns a parsed document AST.
137      */
138     public Document parse(string input)
139     {
140         int lineStart = 0;
141         int lineBreak;
142         while ((lineBreak = Parsing.findLineBreak(input, lineStart)) != -1)
143         {
144             string line = input.substring(lineStart, lineBreak);
145             incorporateLine(line);
146             if (lineBreak + 1 < input.length && input[lineBreak] == '\r'
147                     && input.charAt(lineBreak + 1) == '\n')
148             {
149                 lineStart = lineBreak + 2;
150             }
151             else
152             {
153                 lineStart = lineBreak + 1;
154             }
155         }
156         if (input.length > 0 && (lineStart == 0 || lineStart < input.length))
157         {
158             string line = input.substring(lineStart);
159             incorporateLine(line);
160         }
161 
162         return finalizeAndProcess();
163     }
164 
165     // public Document parse(Reader input) throws IOException {
166     //     BufferedReader bufferedReader;
167     //     if (cast(BufferedReader)input !is null) {
168     //         bufferedReader = (BufferedReader) input;
169     //     } else {
170     //         bufferedReader = new BufferedReader(input);
171     //     }
172 
173     //     string line;
174     //     while ((line = bufferedReader.readLine()) !is null) {
175     //         incorporateLine(line);
176     //     }
177 
178     //     return finalizeAndProcess();
179     // }
180 
181     override public string getLine()
182     {
183         return line;
184     }
185 
186     override public int getIndex()
187     {
188         return index;
189     }
190 
191     override public int getNextNonSpaceIndex()
192     {
193         return nextNonSpace;
194     }
195 
196     override public int getColumn()
197     {
198         return column;
199     }
200 
201     override public int getIndent()
202     {
203         return indent;
204     }
205 
206     override public bool isBlank()
207     {
208         return blank;
209     }
210 
211     override public BlockParser getActiveBlockParser()
212     {
213         auto bp = activeBlockParsers.get(activeBlockParsers.size() - 1);
214         assert(bp !is null);
215         return bp;
216     }
217 
218     /**
219      * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
220      * line of input, then finalizing the document.
221      */
222     private void incorporateLine(string ln)
223     {
224         line = Parsing.prepareLine(ln);
225         version(HUNT_DEBUG)logDebug("prepareLine line : ", line);
226         index = 0;
227         column = 0;
228         columnIsInTab = false;
229 
230         // For each containing block, try to parse the associated line start.
231         // Bail out on failure: container will point to the last matching block.
232         // Set all_matched to false if not all containers match.
233         // The document will always match, can be skipped
234         int matches = 1;
235         List!BlockParser tempList = new ArrayList!BlockParser();
236         for (int i = 1; i < activeBlockParsers.size(); i++)
237         {
238             tempList.add(activeBlockParsers.get(i));
239         }
240         foreach (ref BlockParser blockParser; tempList)
241         {
242             findNextNonSpace();
243             BlockContinue result = blockParser.tryContinue(this);
244             if (cast(BlockContinueImpl) result !is null)
245             {
246                 BlockContinueImpl blockContinue = cast(BlockContinueImpl) result;
247                 if (blockContinue.isFinalize())
248                 {
249                     finalize(blockParser);
250                     return;
251                 }
252                 else
253                 {
254                     if (blockContinue.getNewIndex() != -1)
255                     {
256                         setNewIndex(blockContinue.getNewIndex());
257                     }
258                     else if (blockContinue.getNewColumn() != -1)
259                     {
260                         setNewColumn(blockContinue.getNewColumn());
261                     }
262                     matches++;
263                 }
264             }
265             else
266             {
267                 break;
268             }
269         }
270         List!BlockParser tempList2 = new ArrayList!BlockParser();
271         for (int i = matches; i < activeBlockParsers.size(); i++)
272         {
273             tempList2.add(activeBlockParsers.get(i));
274         }
275         List!(BlockParser) unmatchedBlockParsers = new ArrayList!(BlockParser)(tempList2);
276         BlockParser lastMatchedBlockParser = activeBlockParsers.get(matches - 1);
277         BlockParser blockParser = lastMatchedBlockParser;
278         bool allClosed = unmatchedBlockParsers.isEmpty();
279 
280         // Unless last matched container is a code block, try new container starts,
281         // adding children to the last matched container:
282         bool tryBlockStarts = cast(Paragraph)(blockParser.getBlock()) !is null
283             || blockParser.isContainer();
284         while (tryBlockStarts)
285         {
286             findNextNonSpace();
287 
288             // this is a little performance optimization:
289             if (isBlank() || (indent < Parsing.CODE_BLOCK_INDENT
290                     && Parsing.isLetter(line, nextNonSpace)))
291             {
292                 setNewIndex(nextNonSpace);
293                 break;
294             }
295 
296             BlockStartImpl blockStart = findBlockStart(blockParser);
297             if (blockStart is null)
298             {
299                 setNewIndex(nextNonSpace);
300                 break;
301             }
302 
303             if (!allClosed)
304             {
305                 finalizeBlocks(unmatchedBlockParsers);
306                 allClosed = true;
307             }
308 
309             if (blockStart.getNewIndex() != -1)
310             {
311                 setNewIndex(blockStart.getNewIndex());
312             }
313             else if (blockStart.getNewColumn() != -1)
314             {
315                 setNewColumn(blockStart.getNewColumn());
316             }
317 
318             if (blockStart.isReplaceActiveBlockParser())
319             {
320                 removeActiveBlockParser();
321             }
322             int i = 0;
323             auto bps = blockStart.getBlockParsers();
324             foreach (BlockParser newBlockParser; bps)
325             {
326                 blockParser = addChild(newBlockParser);
327                 tryBlockStarts = newBlockParser.isContainer();
328             }
329         }
330 
331         // What remains at the offset is a text line. Add the text to the
332         // appropriate block.
333 
334         // First check for a lazy paragraph continuation:
335         if (!allClosed && !isBlank() && cast(ParagraphParser) getActiveBlockParser() !is null)
336         {
337             // lazy paragraph continuation
338             addLine();
339 
340         }
341         else
342         {
343 
344             // finalize any blocks not matched
345             if (!allClosed)
346             {
347                 finalizeBlocks(unmatchedBlockParsers);
348             }
349 
350             if (!blockParser.isContainer())
351             {
352                 addLine();
353             }
354             else if (!isBlank())
355             {
356                 // create paragraph container for line
357                 addChild(new ParagraphParser());
358                 addLine();
359             }
360         }
361     }
362 
363     private void findNextNonSpace()
364     {
365         int i = index;
366         int cols = column;
367 
368         blank = true;
369         int length = cast(int) line.length;
370         while (i < length)
371         {
372             char c = line[i];
373             switch (c)
374             {
375             case ' ':
376                 i++;
377                 cols++;
378                 continue;
379             case '\t':
380                 i++;
381                 cols += (4 - (cols % 4));
382                 continue;
383             default:
384                 break;
385             }
386             blank = false;
387             break;
388         }
389 
390         nextNonSpace = i;
391         nextNonSpaceColumn = cols;
392         indent = nextNonSpaceColumn - column;
393     }
394 
395     private void setNewIndex(int newIndex)
396     {
397         if (newIndex >= nextNonSpace)
398         {
399             // We can start from here, no need to calculate tab stops again
400             index = nextNonSpace;
401             column = nextNonSpaceColumn;
402         }
403         int length = cast(int) line.length;
404         while (index < newIndex && index != length)
405         {
406             advance();
407         }
408         // If we're going to an index as opposed to a column, we're never within a tab
409         columnIsInTab = false;
410     }
411 
412     private void setNewColumn(int newColumn)
413     {
414         if (newColumn >= nextNonSpaceColumn)
415         {
416             // We can start from here, no need to calculate tab stops again
417             index = nextNonSpace;
418             column = nextNonSpaceColumn;
419         }
420         int length = cast(int) line.length;
421         while (column < newColumn && index != length)
422         {
423             advance();
424         }
425         if (column > newColumn)
426         {
427             // Last character was a tab and we overshot our target
428             index--;
429             column = newColumn;
430             columnIsInTab = true;
431         }
432         else
433         {
434             columnIsInTab = false;
435         }
436     }
437 
438     private void advance()
439     {
440         char c = line[index];
441         if (c == '\t')
442         {
443             index++;
444             column += Parsing.columnsToNextTabStop(column);
445         }
446         else
447         {
448             index++;
449             column++;
450         }
451     }
452 
453     /**
454      * Add line content to the active block parser. We assume it can accept lines -- that check should be done before
455      * calling this.
456      */
457     private void addLine()
458     {
459         string content;
460         if (columnIsInTab)
461         {
462             // Our column is in a partially consumed tab. Expand the remaining columns (to the next tab stop) to spaces.
463             int afterTab = index + 1;
464             string rest = line.substring(afterTab, cast(int) line.length);
465             int spaces = Parsing.columnsToNextTabStop(column);
466             StringBuilder sb = new StringBuilder(spaces + rest.length);
467             for (int i = 0; i < spaces; i++)
468             {
469                 sb.append(' ');
470             }
471             sb.append(rest);
472             content = sb.toString();
473         }
474         else
475         {
476             content = line.substring(index, cast(int) line.length);
477         }
478         getActiveBlockParser().addLine(content);
479     }
480 
481     private BlockStartImpl findBlockStart(BlockParser blockParser)
482     {
483         MatchedBlockParser matchedBlockParser = new MatchedBlockParserImpl(blockParser);
484         foreach (BlockParserFactory blockParserFactory; blockParserFactories)
485         {
486             BlockStart result = blockParserFactory.tryStart(this, matchedBlockParser);
487             if (cast(BlockStartImpl) result !is null)
488             {
489                 return cast(BlockStartImpl) result;
490             }
491         }
492         return null;
493     }
494 
495     /**
496      * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings,
497      * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference
498      * definitions.
499      */
500     private void finalize(BlockParser blockParser)
501     {
502         // logDebug("!!!!");
503         if (getActiveBlockParser() is blockParser)
504         {
505             deactivateBlockParser();
506         }
507 
508         blockParser.closeBlock();
509 
510         if (cast(ParagraphParser)blockParser !is null && cast(ReferenceParser)inlineParser !is null)
511         {
512             ParagraphParser paragraphParser = cast(ParagraphParser) blockParser;
513             paragraphParser.closeBlock(cast(ReferenceParser) inlineParser);
514         }
515     }
516 
517     /**
518      * Walk through a block & children recursively, parsing string content into inline content where appropriate.
519      */
520     private void processInlines()
521     {
522         foreach (BlockParser blockParser; allBlockParsers)
523         {
524             blockParser.parseInlines(inlineParser);
525         }
526     }
527 
528     /**
529      * Add block of type tag as a child of the tip. If the tip can't  accept children, close and finalize it and try
530      * its parent, and so on til we find a block that can accept children.
531      */
532     private BlockParser addChild(BlockParser blockParser)
533     {
534         try
535         {
536             if (blockParser is null)
537                 return null;
538             
539             while (blockParser !is null && getActiveBlockParser() !is null
540                     && !(getActiveBlockParser().canContain(blockParser.getBlock())))
541             {
542                 finalize(getActiveBlockParser());
543             }
544 
545             getActiveBlockParser().getBlock().appendChild(blockParser.getBlock());
546             activateBlockParser(blockParser);
547 
548         }
549         catch (Throwable e)
550         {
551             logError("msg : ", e.msg);
552         }
553         return blockParser;
554 
555     }
556 
557     private void activateBlockParser(BlockParser blockParser)
558     {
559         activeBlockParsers.add(blockParser);
560         allBlockParsers.add(blockParser);
561     }
562 
563     private void deactivateBlockParser()
564     {
565         activeBlockParsers.removeAt(activeBlockParsers.size() - 1);
566     }
567 
568     private void removeActiveBlockParser()
569     {
570         BlockParser old = getActiveBlockParser();
571         deactivateBlockParser();
572         auto f = allBlockParsers.remove(old);
573         assert(f);
574 
575         old.getBlock().unlink();
576     }
577 
578     /**
579      * Finalize blocks of previous line. Returns true.
580      */
581     private void finalizeBlocks(List!(BlockParser) blockParsers)
582     {
583         for (int i = blockParsers.size() - 1; i >= 0; i--)
584         {
585             BlockParser blockParser = blockParsers.get(i);
586             finalize(blockParser);
587         }
588     }
589 
590     private Document finalizeAndProcess()
591     {
592         finalizeBlocks(this.activeBlockParsers);
593         this.processInlines();
594         return this.documentBlockParser.getBlock();
595     }
596 
597     private static class MatchedBlockParserImpl : MatchedBlockParser
598     {
599 
600         private BlockParser matchedBlockParser;
601 
602         public this(BlockParser matchedBlockParser)
603         {
604             this.matchedBlockParser = matchedBlockParser;
605         }
606 
607         override public BlockParser getMatchedBlockParser()
608         {
609             return matchedBlockParser;
610         }
611 
612         override public string getParagraphContent()
613         {
614             if (cast(ParagraphParser) matchedBlockParser !is null)
615             {
616                 ParagraphParser paragraphParser = cast(ParagraphParser) matchedBlockParser;
617                 return paragraphParser.getContentString();
618             }
619             return null;
620         }
621     }
622 }