1 module hunt.markdown.internal.HtmlBlockParser;
2 
3 import hunt.markdown.internal.util.Parsing;
4 import hunt.markdown.internal.BlockContent;
5 import hunt.markdown.node.Block;
6 import hunt.markdown.node.HtmlBlock;
7 import hunt.markdown.node.Paragraph;
8 import hunt.markdown.parser.block.AbstractBlockParser;
9 import hunt.markdown.parser.block.BlockContinue;
10 import hunt.markdown.parser.block.ParserState;
11 import hunt.markdown.parser.block.BlockStart;
12 import hunt.markdown.parser.block.AbstractBlockParserFactory;
13 import hunt.markdown.parser.block.MatchedBlockParser;
14 
15 import hunt.text.Common;
16 
17 import std.regex;
18 import hunt.logging;
19 
20 class HtmlBlockParser : AbstractBlockParser {
21 
22     private static string[][] BLOCK_PATTERNS = [
23             ["", ""],
24             ["^<(?:script|pre|style)(?:\\s|>|$)", "</(?:script|pre|style)>"],
25             ["^<!--", "-->"],
26             ["^<[?]", "\\?>"],
27             ["^<![A-Z]", ">"],
28             ["^<!\\[CDATA\\[", "\\]\\]>"],
29             ["^</?(?:" ~
30                             "address|article|aside|" ~
31                             "base|basefont|blockquote|body|" ~
32                             "caption|center|col|colgroup|" ~
33                             "dd|details|dialog|dir|div|dl|dt|" ~
34                             "fieldset|figcaption|figure|footer|form|frame|frameset|" ~
35                             "h1|h2|h3|h4|h5|h6|head|header|hr|html|" ~
36                             "iframe|" ~
37                             "legend|li|link|" ~
38                             "main|menu|menuitem|meta|" ~
39                             "nav|noframes|" ~
40                             "ol|optgroup|option|" ~
41                             "p|param|" ~
42                             "section|source|summary|" ~
43                             "table|tbody|td|tfoot|th|thead|title|tr|track|" ~
44                             "ul" ~
45                             ")(?:\\s|[/]?[>]|$)",""],
46             ["^(?:" ~ Parsing.OPENTAG ~ '|' ~ Parsing.CLOSETAG ~ ")\\s*$", null]
47         ];
48 
49     private HtmlBlock block;
50     private Regex!char closingPattern;
51 
52     private bool finished = false;
53     private BlockContent content;
54 
55     private this(Regex!char closingPattern) {
56         block = new HtmlBlock();
57         content = new BlockContent();
58         this.closingPattern = closingPattern;
59     }
60 
61     override public Block getBlock() {
62         return block;
63     }
64 
65     public BlockContinue tryContinue(ParserState state) {
66         if (finished) {
67             return BlockContinue.none();
68         }
69 
70         // Blank line ends type 6 and type 7 blocks
71         if (state.isBlank() && closingPattern.empty()) {
72             return BlockContinue.none();
73         } else {
74             return BlockContinue.atIndex(state.getIndex());
75         }
76     }
77 
78     override public void addLine(string line) {
79         content.add(line);
80 
81         if (!closingPattern.empty() && !matchAll(line,closingPattern).empty()) {
82             finished = true;
83         }
84     }
85 
86     override public void closeBlock() {
87         block.setLiteral(content.getString());
88         content = null;
89     }
90 
91     public static class Factory : AbstractBlockParserFactory {
92 
93         public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) {
94             int nextNonSpace = state.getNextNonSpaceIndex();
95             string line = state.getLine();
96 
97             if (state.getIndent() < 4 && line[nextNonSpace] == '<') {
98                 for (int blockType = 1; blockType <= 7; blockType++) {
99                     // Type 7 can not interrupt a paragraph
100                     if (blockType == 7 && cast(Paragraph)matchedBlockParser.getMatchedBlockParser().getBlock() !is null) {
101                         continue;
102                     }
103                     Regex!char opener = regex(BLOCK_PATTERNS[blockType][0]);
104                     Regex!char closer = regex(BLOCK_PATTERNS[blockType][1]);
105                     bool matches = matchAll(line.substring(nextNonSpace, cast(int)line.length),opener).empty();
106                     if (!matches) {
107                         return BlockStart.of(new HtmlBlockParser(closer)).atIndex(state.getIndex());
108                     }
109                 }
110             }
111             return BlockStart.none();
112         }
113     }
114 }