hunt.markdown.internal.InlineParserImpl source code

1 module hunt.markdown.internal.InlineParserImpl;
2 
3 import hunt.markdown.internal.ReferenceParser;
4 import hunt.markdown.internal.inline.AsteriskDelimiterProcessor;
5 import hunt.markdown.internal.inline.UnderscoreDelimiterProcessor;
6 import hunt.markdown.internal.util.Escaping;
7 import hunt.markdown.internal.util.Html5Entities;
8 import hunt.markdown.internal.util.Parsing;
9 import hunt.markdown.node.Node;
10 import hunt.markdown.node.Text;
11 import hunt.markdown.node.Link;
12 import hunt.markdown.parser.InlineParser;
13 import hunt.markdown.parser.delimiter.DelimiterProcessor;
14 import hunt.markdown.internal.Delimiter;
15 import hunt.markdown.internal.Bracket;
16 import hunt.markdown.internal.StaggeredDelimiterProcessor;
17 import hunt.markdown.node.HardLineBreak;
18 import hunt.markdown.node.SoftLineBreak;
19 import hunt.markdown.node.Code;
20 import hunt.markdown.node.HtmlInline;
21 import hunt.markdown.node.Image;
22 
23 import hunt.collection.BitSet;
24 import hunt.collection.Map;
25 import hunt.collection.Set;
26 import hunt.collection.List;
27 import hunt.collection.ArrayList;
28 import hunt.collection.HashMap;
29 import hunt.util.Common;
30 import hunt.Char;
31 import hunt.text.Common;
32 import hunt.util.StringBuilder;
33 import hunt.Exceptions;
34 
35 import std.regex;
36 import std.string;
37 
38 alias Character = Char;
39 
40 class InlineParserImpl : InlineParser, ReferenceParser {
41 
42     private enum string ESCAPED_CHAR = "\\\\" ~ Escaping.ESCAPABLE;
43     private enum string HTMLCOMMENT = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->";
44     private enum string PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
45     private enum string DECLARATION = "<![A-Z]+\\s+[^>]*>";
46     private enum string CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>";
47     private enum string HTMLTAG = "(?:" ~ Parsing.OPENTAG ~ "|" ~ Parsing.CLOSETAG ~ "|" ~ HTMLCOMMENT
48             ~ "|" ~ PROCESSINGINSTRUCTION ~ "|" ~ DECLARATION ~ "|" ~ CDATA ~ ")";
49     private enum string ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});";
50 
51     private enum string ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~";
52     private enum string PUNCTUATION = ("^[" ~ ASCII_PUNCTUATION ~ "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]");
53 
54     private enum string HTML_TAG = '^' ~ HTMLTAG; //i
55 
56     private enum string LINK_TITLE = (
57             "^(?:\"(" ~ ESCAPED_CHAR ~ "|[^\"\\x00])*\"" ~
58                     '|' ~
59                     "'(" ~ ESCAPED_CHAR ~ "|[^'\\x00])*'" ~
60                     '|' ~
61                     "\\((" ~ ESCAPED_CHAR ~ "|[^)\\x00])*\\))");
62 
63     private enum string LINK_DESTINATION_BRACES = ("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])");
64 
65     private enum string LINK_LABEL = ("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]");
66 
67     private enum string ESCAPABLE = ('^' ~ Escaping.ESCAPABLE);
68 
69     private enum string ENTITY_HERE = '^' ~ ENTITY; //i
70 
71     private enum string TICKS = ("`+");
72 
73     private enum string TICKS_HERE = ("^`+");
74 
75     private enum string EMAIL_AUTOLINK = ("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>");
76 
77     private enum string AUTOLINK = ("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>");
78 
79     private enum string SPNL = ("^ *(?:\n *)?");
80 
81     private enum string UNICODE_WHITESPACE_CHAR = ("^[\\p{Zs}\t\r\n\f]");
82 
83     private enum string WHITESPACE = ("\\s+");
84 
85     private enum string FINAL_SPACE = (" *$");
86 
87     private enum string LINE_END = ("^ *(?:\n|$)");
88 
89     private BitSet specialCharacters;
90     private BitSet delimiterCharacters;
91     private Map!(Character, DelimiterProcessor) delimiterProcessors;
92 
93     /**
94      * Link references by ID, needs to be built up using parseReference before calling parse.
95      */
96     private Map!(string, Link) referenceMap;
97 
98     private Node block;
99 
100     private string input;
101     private int index;
102 
103     /**
104      * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different
105      * from the algorithm described in the spec.)
106      */
107     private Delimiter lastDelimiter;
108 
109     /**
110      * Top opening bracket (<code>[</code> or <code>![)</code>).
111      */
112     private Bracket lastBracket;
113 
114     public this(List!(DelimiterProcessor) delimiterProcessors) {
115         referenceMap = new HashMap!(string, Link)();
116         this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors);
117         Character[] chars;
118         foreach(k,v;this.delimiterProcessors)
119         {
120             chars ~= k;
121         }
122         this.delimiterCharacters = calculateDelimiterCharacters(chars);
123         this.specialCharacters = calculateSpecialCharacters(delimiterCharacters);
124     }
125 
126     public static BitSet calculateDelimiterCharacters(Set!(Character) characters) {
127         BitSet bitSet = new BitSet();
128         foreach (Character character ; characters) {
129             bitSet.set(cast(int)(character.charValue));
130         }
131         return bitSet;
132     }
133 
134     public static BitSet calculateDelimiterCharacters(Character[] characters) {
135         BitSet bitSet = new BitSet();
136         foreach (Character character ; characters) {
137             bitSet.set(cast(int)(character.charValue));
138         }
139         return bitSet;
140     }
141 
142     public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) {
143         BitSet bitSet = new BitSet();
144         bitSet.or(delimiterCharacters);
145         bitSet.set('\n');
146         bitSet.set('`');
147         bitSet.set('[');
148         bitSet.set(']');
149         bitSet.set('\\');
150         bitSet.set('!');
151         bitSet.set('<');
152         bitSet.set('&');
153         return bitSet;
154     }
155 
156     public static Map!(Character, DelimiterProcessor) calculateDelimiterProcessors(List!(DelimiterProcessor) delimiterProcessors) {
157         Map!(Character, DelimiterProcessor) map = new HashMap!(Character, DelimiterProcessor)();
158         auto list = new ArrayList!DelimiterProcessor();
159         list.add(new AsteriskDelimiterProcessor());
160         list.add(new UnderscoreDelimiterProcessor());
161         addDelimiterProcessors(list, map);
162         addDelimiterProcessors(delimiterProcessors, map);
163         return map;
164     }
165 
166     private static void addDelimiterProcessors(Iterable!(DelimiterProcessor) delimiterProcessors, Map!(Character, DelimiterProcessor) map) {
167         foreach (DelimiterProcessor delimiterProcessor ; delimiterProcessors) {
168             char opening = delimiterProcessor.getOpeningCharacter();
169             char closing = delimiterProcessor.getClosingCharacter();
170             if (opening == closing) {
171                 DelimiterProcessor old = map.get(new Char(opening));
172                 if (old !is null && old.getOpeningCharacter() == old.getClosingCharacter()) {
173                     StaggeredDelimiterProcessor s;
174                     if (cast(StaggeredDelimiterProcessor)old !is null) {
175                         s = cast(StaggeredDelimiterProcessor) old;
176                     } else {
177                         s = new StaggeredDelimiterProcessor(opening);
178                         s.add(old);
179                     }
180                     s.add(delimiterProcessor);
181                     map.put(new Char(opening), s);
182                 } else {
183                     addDelimiterProcessorForChar(opening, delimiterProcessor, map);
184                 }
185             } else {
186                 addDelimiterProcessorForChar(opening, delimiterProcessor, map);
187                 addDelimiterProcessorForChar(closing, delimiterProcessor, map);
188             }
189         }
190     }
191 
192     private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map!(Character, DelimiterProcessor) delimiterProcessors) {
193         DelimiterProcessor existing = delimiterProcessors.put(new Char(delimiterChar), toAdd);
194         if (existing !is null) {
195             throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" ~ delimiterChar ~ "'");
196         }
197     }
198 
199     /**
200      * Parse content in block into inline children, using reference map to resolve references.
201      */
202     override public void parse(string content, Node block) {
203         this.block = block;
204         this.input = content.strip();
205         this.index = 0;
206         this.lastDelimiter = null;
207         this.lastBracket = null;
208 
209         bool moreToParse;
210         do {
211             moreToParse = parseInline();
212         } while (moreToParse);
213 
214         processDelimiters(null);
215         mergeChildTextNodes(block);
216     }
217 
218     /**
219      * Attempt to parse a link reference, modifying the internal reference map.
220      */
221     override public int parseReference(string s) {
222         this.input = s;
223         this.index = 0;
224         string dest;
225         string title;
226         int matchChars;
227         int startIndex = index;
228 
229         // label:
230         matchChars = parseLinkLabel();
231         if (matchChars == 0) {
232             return 0;
233         }
234 
235         string rawLabel = input.substring(0, matchChars);
236 
237         // colon:
238         if (peek() != ':') {
239             return 0;
240         }
241         index++;
242 
243         // link url
244         spnl();
245 
246         dest = parseLinkDestination();
247         if (dest is null || dest.length == 0) {
248             return 0;
249         }
250 
251         int beforeTitle = index;
252         spnl();
253         title = parseLinkTitle();
254         if (title is null) {
255             // rewind before spaces
256             index = beforeTitle;
257         }
258 
259         bool atLineEnd = true;
260         if (index != input.length && match(regex(LINE_END)) is null) {
261             if (title is null) {
262                 atLineEnd = false;
263             } else {
264                 // the potential title we found is not at the line end,
265                 // but it could still be a legal link reference if we
266                 // discard the title
267                 title = null;
268                 // rewind before spaces
269                 index = beforeTitle;
270                 // and instead check if the link URL is at the line end
271                 atLineEnd = match(regex(LINE_END)) !is null;
272             }
273         }
274 
275         if (!atLineEnd) {
276             return 0;
277         }
278 
279         string normalizedLabel = Escaping.normalizeReference(rawLabel);
280         if (normalizedLabel.isEmpty()) {
281             return 0;
282         }
283 
284         if (!referenceMap.containsKey(normalizedLabel)) {
285             Link link = new Link(dest, title);
286             referenceMap.put(normalizedLabel, link);
287         }
288         return index - startIndex;
289     }
290 
291     private Text appendText(string text, int beginIndex, int endIndex) {
292         return appendText(text.substring(beginIndex, endIndex));
293     }
294 
295     private Text appendText(string text) {
296         Text node = new Text(text);
297         appendNode(node);
298         return node;
299     }
300 
301     private void appendNode(Node node) {
302         block.appendChild(node);
303     }
304 
305     /**
306      * Parse the next inline element in subject, advancing input index.
307      * On success, add the result to block's children and return true.
308      * On failure, return false.
309      */
310     private bool parseInline() {
311         bool res;
312         char c = peek();
313         if (c == '\0') {
314             return false;
315         }
316         switch (c) {
317             case '\n':
318                 res = parseNewline();
319                 break;
320             case '\\':
321                 res = parseBackslash();
322                 break;
323             case '`':
324                 res = parseBackticks();
325                 break;
326             case '[':
327                 res = parseOpenBracket();
328                 break;
329             case '!':
330                 res = parseBang();
331                 break;
332             case ']':
333                 res = parseCloseBracket();
334                 break;
335             case '<':
336                 res = parseAutolink() || parseHtmlInline();
337                 break;
338             case '&':
339                 res = parseEntity();
340                 break;
341             default:
342                 bool isDelimiter = delimiterCharacters.get(c);
343                 if (isDelimiter) {
344                     DelimiterProcessor delimiterProcessor = delimiterProcessors.get(new Char(c));
345                     res = parseDelimiters(delimiterProcessor, c);
346                 } else {
347                     res = parseString();
348                 }
349                 break;
350         }
351         if (!res) {
352             index++;
353             // When we get here, it's only for a single special character that turned out to not have a special meaning.
354             // So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String.
355             string literal = "" ~ c;
356             appendText(literal);
357         }
358 
359         return true;
360     }
361 
362     /**
363      * If RE matches at current index in the input, advance index and return the match; otherwise return null.
364      */
365     private string match(Regex!char re) {
366         if (index >= input.length) {
367             return null;
368         }
369         auto matcher = matchAll(input[index .. $],re);
370         // matcher.region(index, cast(int)input.length);
371         // bool m = matcher.find();
372         if (!matcher.empty()) {
373             auto cp = matcher.front.captures[0];
374             index = index + cast(int)(input[index..$].indexOf(cp)) + cast(int)(cp.length);
375             return cp;
376         } else {
377             return null;
378         }
379     }
380 
381     /**
382      * Returns the char at the current input index, or {@code '\0'} in case there are no more characters.
383      */
384     private char peek() {
385         if (index < input.length) {
386             return input[index];
387         } else {
388             return '\0';
389         }
390     }
391 
392     /**
393      * Parse zero or more space characters, including at most one newline.
394      */
395     private bool spnl() {
396         match(regex(SPNL));
397         return true;
398     }
399 
400     /**
401      * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break.
402      */
403     private bool parseNewline() {
404         index++; // assume we're at a \n
405 
406         Node lastChild = block.getLastChild();
407         // Check previous text for trailing spaces.
408         // The "endsWith" is an optimization to avoid an RE match in the common case.
409         if (lastChild !is null && cast(Text)lastChild !is null && (cast(Text) lastChild).getLiteral().endsWith(" ")) {
410             Text text = cast(Text) lastChild;
411             string literal = text.getLiteral();
412             auto matcher = matchAll(literal,regex(FINAL_SPACE));
413             int spaces = !matcher.empty() ? cast(int)(matcher.front.captures[0].length) : 0;
414             if (spaces > 0) {
415                 text.setLiteral(literal.substring(0, cast(int)literal.length - spaces));
416             }
417             appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak());
418         } else {
419             appendNode(new SoftLineBreak());
420         }
421 
422         // gobble leading spaces in next line
423         while (peek() == ' ') {
424             index++;
425         }
426         return true;
427     }
428 
429     /**
430      * Parse a backslash-escaped special character, adding either the escaped  character, a hard line break
431      * (if the backslash is followed by a newline), or a literal backslash to the block's children.
432      */
433     private bool parseBackslash() {
434         index++;
435         if (peek() == '\n') {
436             appendNode(new HardLineBreak());
437             index++;
438         } else if (index < input.length && !matchAll(input.substring(index, index + 1),regex(ESCAPABLE)).empty()) {
439             appendText(input, index, index + 1);
440             index++;
441         } else {
442             appendText("\\");
443         }
444         return true;
445     }
446 
447     /**
448      * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks.
449      */
450     private bool parseBackticks() {
451         string ticks = match(regex(TICKS_HERE));
452         if (ticks is null) {
453             return false;
454         }
455         int afterOpenTicks = index;
456         string matched;
457         while ((matched = match(regex(TICKS))) !is null) {
458             if (matched == ticks) {
459                 Code node = new Code();
460                 string content = input.substring(afterOpenTicks, index - ticks.length);
461                 string literal = replaceAll(content.strip(), regex(WHITESPACE)," ");
462                 node.setLiteral(literal);
463                 appendNode(node);
464                 return true;
465             }
466         }
467         // If we got here, we didn't match a closing backtick sequence.
468         index = afterOpenTicks;
469         appendText(ticks);
470         return true;
471     }
472 
473     /**
474      * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters.
475      */
476     private bool parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) {
477         DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar);
478         if (res is null) {
479             return false;
480         }
481         int length = res.count;
482         int startIndex = index;
483 
484         index += length;
485         Text node = appendText(input, startIndex, index);
486 
487         // Add entry to stack for this opener
488         lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, lastDelimiter);
489         lastDelimiter.setLength(length);
490         lastDelimiter.setOriginalLength(length);
491         if (lastDelimiter.previous !is null) {
492             lastDelimiter.previous.next = lastDelimiter;
493         }
494 
495         return true;
496     }
497 
498     /**
499      * Add open bracket to delimiter stack and add a text node to block's children.
500      */
501     private bool parseOpenBracket() {
502         int startIndex = index;
503         index++;
504 
505         Text node = appendText("[");
506 
507         // Add entry to stack for this opener
508         addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter));
509 
510         return true;
511     }
512 
513     /**
514      * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children.
515      * Otherwise just add a text node.
516      */
517     private bool parseBang() {
518         int startIndex = index;
519         index++;
520         if (peek() == '[') {
521             index++;
522 
523             Text node = appendText("![");
524 
525             // Add entry to stack for this opener
526             addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter));
527         } else {
528             appendText("!");
529         }
530         return true;
531     }
532 
533     /**
534      * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a
535      * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack.
536      */
537     private bool parseCloseBracket() {
538         index++;
539         int startIndex = index;
540 
541         // Get previous `[` or `![`
542         Bracket opener = lastBracket;
543         if (opener is null) {
544             // No matching opener, just return a literal.
545             appendText("]");
546             return true;
547         }
548 
549         if (!opener.allowed) {
550             // Matching opener but it's not allowed, just return a literal.
551             appendText("]");
552             removeLastBracket();
553             return true;
554         }
555 
556         // Check to see if we have a link/image
557 
558         string dest = null;
559         string title = null;
560         bool isLinkOrImage = false;
561 
562         // Maybe a inline link like `[foo](/uri "title")`
563         if (peek() == '(') {
564             index++;
565             spnl();
566             if ((dest = parseLinkDestination()) !is null) {
567                 spnl();
568                 // title needs a whitespace before
569                 if (!matchAll(input.substring(index - 1, index),regex(WHITESPACE)).empty()) {
570                     title = parseLinkTitle();
571                     spnl();
572                 }
573                 if (peek() == ')') {
574                     index++;
575                     isLinkOrImage = true;
576                 } else {
577                     index = startIndex;
578                 }
579             }
580         }
581 
582         // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]`
583         if (!isLinkOrImage) {
584 
585             // See if there's a link label like `[bar]` or `[]`
586             int beforeLabel = index;
587             int labelLength = parseLinkLabel();
588             string r = null;
589             if (labelLength > 2) {
590                 r = input.substring(beforeLabel, beforeLabel + labelLength);
591             } else if (!opener.bracketAfter) {
592                 // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference.
593                 // But it can only be a reference when there's no (unescaped) bracket in it.
594                 // If there is, we don't even need to try to look up the reference. This is an optimization.
595                 r = input.substring(opener.index, startIndex);
596             }
597 
598             if (r !is null) {
599                 Link link = referenceMap.get(Escaping.normalizeReference(r));
600                 if (link !is null) {
601                     dest = link.getDestination();
602                     title = link.getTitle();
603                     isLinkOrImage = true;
604                 }
605             }
606         }
607 
608         if (isLinkOrImage) {
609             // If we got here, open is a potential opener
610             Node linkOrImage = opener._image ? new Image(dest, title) : new Link(dest, title);
611 
612             Node node = opener.node.getNext();
613             while (node !is null) {
614                 Node next = node.getNext();
615                 linkOrImage.appendChild(node);
616                 node = next;
617             }
618             appendNode(linkOrImage);
619 
620             // Process delimiters such as emphasis inside link/image
621             processDelimiters(opener.previousDelimiter);
622             mergeChildTextNodes(linkOrImage);
623             // We don't need the corresponding text node anymore, we turned it into a link/image node
624             opener.node.unlink();
625             removeLastBracket();
626 
627             // Links within links are not allowed. We found this link, so there can be no other link around it.
628             if (!opener._image) {
629                 Bracket bracket = lastBracket;
630                 while (bracket !is null) {
631                     if (!bracket._image) {
632                         // Disallow link opener. It will still get matched, but will not result in a link.
633                         bracket.allowed = false;
634                     }
635                     bracket = bracket.previous;
636                 }
637             }
638 
639             return true;
640 
641         } else { // no link or image
642 
643             appendText("]");
644             removeLastBracket();
645 
646             index = startIndex;
647             return true;
648         }
649     }
650 
651     private void addBracket(Bracket bracket) {
652         if (lastBracket !is null) {
653             lastBracket.bracketAfter = true;
654         }
655         lastBracket = bracket;
656     }
657 
658     private void removeLastBracket() {
659         lastBracket = lastBracket.previous;
660     }
661 
662     /**
663      * Attempt to parse link destination, returning the string or null if no match.
664      */
665     private string parseLinkDestination() {
666         string res = match(regex(LINK_DESTINATION_BRACES));
667         if (res !is null) { // chop off surrounding <..>:
668             if (res.length == 2) {
669                 return "";
670             } else {
671                 return Escaping.unescapeString(res.substring(1, cast(int)res.length - 1));
672             }
673         } else {
674             int startIndex = index;
675             parseLinkDestinationWithBalancedParens();
676             return Escaping.unescapeString(input.substring(startIndex, index));
677         }
678     }
679 
680     private void parseLinkDestinationWithBalancedParens() {
681         int parens = 0;
682         while (true) {
683             char c = peek();
684             switch (c) {
685                 case '\0':
686                     return;
687                 case '\\':
688                     // check if we have an escapable character
689                     if (index + 1 < input.length && !matchAll(input.substring(index + 1, index + 2),regex(ESCAPABLE)).empty()) {
690                         // skip over the escaped character (after switch)
691                         index++;
692                         break;
693                     }
694                     // otherwise, we treat this as a literal backslash
695                     break;
696                 case '(':
697                     parens++;
698                     break;
699                 case ')':
700                     if (parens == 0) {
701                         return;
702                     } else {
703                         parens--;
704                     }
705                     break;
706                 case ' ':
707                     // ASCII space
708                     return;
709                 default:
710                     // or control character
711                     if (Char.isISOControl(c)) {
712                         return;
713                     }
714             }
715             index++;
716         }
717     }
718 
719     /**
720      * Attempt to parse link title (sans quotes), returning the string or null if no match.
721      */
722     private string parseLinkTitle() {
723         string title = match(regex(LINK_TITLE));
724         if (title !is null) {
725             // chop off quotes from title and unescape:
726             return Escaping.unescapeString(title.substring(1, cast(int)title.length - 1));
727         } else {
728             return null;
729         }
730     }
731 
732     /**
733      * Attempt to parse a link label, returning number of characters parsed.
734      */
735     private int parseLinkLabel() {
736         string m = match(regex(LINK_LABEL));
737         // Spec says "A link label can have at most 999 characters inside the square brackets"
738         if (m is null || m.length > 1001) {
739             return 0;
740         } else {
741             return cast(int)(m.length);
742         }
743     }
744 
745     /**
746      * Attempt to parse an autolink (URL or email in pointy brackets).
747      */
748     private bool parseAutolink() {
749         string m;
750         if ((m = match(regex(EMAIL_AUTOLINK))) !is null) {
751             string dest = m.substring(1, cast(int)m.length - 1);
752             Link node = new Link("mailto:" ~ dest, null);
753             node.appendChild(new Text(dest));
754             appendNode(node);
755             return true;
756         } else if ((m = match(regex(AUTOLINK))) !is null) {
757             string dest = m.substring(1, cast(int)m.length - 1);
758             Link node = new Link(dest, null);
759             node.appendChild(new Text(dest));
760             appendNode(node);
761             return true;
762         } else {
763             return false;
764         }
765     }
766 
767     /**
768      * Attempt to parse inline HTML.
769      */
770     private bool parseHtmlInline() {
771         string m = this.match(regex(HTML_TAG,"i"));
772         if (m !is null) {
773             HtmlInline node = new HtmlInline();
774             node.setLiteral(m);
775             appendNode(node);
776             return true;
777         } else {
778             return false;
779         }
780     }
781 
782     /**
783      * Attempt to parse an entity, return Entity object if successful.
784      */
785     private bool parseEntity() {
786         string m;
787         if ((m = match(regex(ENTITY_HERE,"i"))) !is null) {
788             appendText(Html5Entities.entityToString(m));
789             return true;
790         } else {
791             return false;
792         }
793     }
794 
795     /**
796      * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string.
797      */
798     private bool parseString() {
799         int begin = index;
800         int length = cast(int)(input.length);
801         while (index != length) {
802             if (specialCharacters.get(input[index])) {
803                 break;
804             }
805             index++;
806         }
807         if (begin != index) {
808             appendText(input, begin, index);
809             return true;
810         } else {
811             return false;
812         }
813     }
814 
815     /**
816      * Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters
817      * and whether they are positioned such that they can open and/or close emphasis or strong emphasis.
818      *
819      * @return information about delimiter run, or {@code null}
820      */
821     private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) {
822         int startIndex = index;
823 
824         int delimiterCount = 0;
825         while (peek() == delimiterChar) {
826             delimiterCount++;
827             index++;
828         }
829 
830         if (delimiterCount < delimiterProcessor.getMinLength()) {
831             index = startIndex;
832             return null;
833         }
834 
835         string before = startIndex == 0 ? "\n" :
836                 input.substring(startIndex - 1, startIndex);
837 
838         char charAfter = peek();
839         string after = charAfter == '\0' ? "\n" :
840                  "" ~ (charAfter);
841 
842         // We could be more lazy here, in most cases we don't need to do every match case.
843         bool beforeIsPunctuation = !matchAll(before,PUNCTUATION).empty();
844         bool beforeIsWhitespace = !matchAll(before,regex(UNICODE_WHITESPACE_CHAR)).empty();
845         bool afterIsPunctuation = !matchAll(after,PUNCTUATION).empty();
846         bool afterIsWhitespace = !matchAll(after,regex(UNICODE_WHITESPACE_CHAR)).empty();
847 
848         bool leftFlanking = !afterIsWhitespace &&
849                 (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation);
850         bool rightFlanking = !beforeIsWhitespace &&
851                 (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation);
852         bool canOpen;
853         bool canClose;
854         if (delimiterChar == '_') {
855             canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation);
856             canClose = rightFlanking && (!leftFlanking || afterIsPunctuation);
857         } else {
858             canOpen = leftFlanking && delimiterChar == delimiterProcessor.getOpeningCharacter();
859             canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter();
860         }
861 
862         index = startIndex;
863         return new DelimiterData(delimiterCount, canOpen, canClose);
864     }
865 
866     private void processDelimiters(Delimiter stackBottom) {
867 
868         Map!(Character, Delimiter) openersBottom = new HashMap!(Character, Delimiter)();
869 
870         // find first closer above stackBottom:
871         Delimiter closer = lastDelimiter;
872         while (closer !is null && closer.previous != stackBottom) {
873             closer = closer.previous;
874         }
875         // move forward, looking for closers, and handling each
876         while (closer !is null) {
877             char delimiterChar = closer.delimiterChar;
878 
879             DelimiterProcessor delimiterProcessor = delimiterProcessors.get(new Char(delimiterChar));
880             if (!closer.canClose || delimiterProcessor is null) {
881                 closer = closer.next;
882                 continue;
883             }
884 
885             char openingDelimiterChar = delimiterProcessor.getOpeningCharacter();
886 
887             // Found delimiter closer. Now look back for first matching opener.
888             int useDelims = 0;
889             bool openerFound = false;
890             bool potentialOpenerFound = false;
891             Delimiter opener = closer.previous;
892             while (opener !is null && opener != stackBottom && opener != openersBottom.get(new Char(delimiterChar))) {
893                 if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) {
894                     potentialOpenerFound = true;
895                     useDelims = delimiterProcessor.getDelimiterUse(opener, closer);
896                     if (useDelims > 0) {
897                         openerFound = true;
898                         break;
899                     }
900                 }
901                 opener = opener.previous;
902             }
903 
904             if (!openerFound) {
905                 if (!potentialOpenerFound) {
906                     // Set lower bound for future searches for openers.
907                     // Only do this when we didn't even have a potential
908                     // opener (one that matches the character and can open).
909                     // If an opener was rejected because of the number of
910                     // delimiters (e.g. because of the "multiple of 3" rule),
911                     // we want to consider it next time because the number
912                     // of delimiters can change as we continue processing.
913                     openersBottom.put(new Char(delimiterChar), closer.previous);
914                     if (!closer.canOpen) {
915                         // We can remove a closer that can't be an opener,
916                         // once we've seen there's no matching opener:
917                         removeDelimiterKeepNode(closer);
918                     }
919                 }
920                 closer = closer.next;
921                 continue;
922             }
923 
924             Text openerNode = opener.node;
925             Text closerNode = closer.node;
926 
927             // Remove number of used delimiters from stack and inline nodes.
928             opener.setLength(opener.length - useDelims);
929             closer.setLength(opener.length - useDelims);
930             openerNode.setLiteral(
931                     openerNode.getLiteral().substring(0,
932                             openerNode.getLiteral().length - useDelims));
933             closerNode.setLiteral(
934                     closerNode.getLiteral().substring(0,
935                             closerNode.getLiteral().length - useDelims));
936 
937             removeDelimitersBetween(opener, closer);
938             // The delimiter processor can re-parent the nodes between opener and closer,
939             // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves.
940             mergeTextNodesBetweenExclusive(openerNode, closerNode);
941             delimiterProcessor.process(openerNode, closerNode, useDelims);
942 
943             // No delimiter characters left to process, so we can remove delimiter and the now empty node.
944             if (opener.length == 0) {
945                 removeDelimiterAndNode(opener);
946             }
947 
948             if (closer.length == 0) {
949                 Delimiter next = closer.next;
950                 removeDelimiterAndNode(closer);
951                 closer = next;
952             }
953         }
954 
955         // remove all delimiters
956         while (lastDelimiter !is null && lastDelimiter != stackBottom) {
957             removeDelimiterKeepNode(lastDelimiter);
958         }
959     }
960 
961     private void removeDelimitersBetween(Delimiter opener, Delimiter closer) {
962         Delimiter delimiter = closer.previous;
963         while (delimiter !is null && delimiter != opener) {
964             Delimiter previousDelimiter = delimiter.previous;
965             removeDelimiterKeepNode(delimiter);
966             delimiter = previousDelimiter;
967         }
968     }
969 
970     /**
971      * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`.
972      */
973     private void removeDelimiterAndNode(Delimiter delim) {
974         Text node = delim.node;
975         node.unlink();
976         removeDelimiter(delim);
977     }
978 
979     /**
980      * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`.
981      */
982     private void removeDelimiterKeepNode(Delimiter delim) {
983         removeDelimiter(delim);
984     }
985 
986     private void removeDelimiter(Delimiter delim) {
987         if (delim.previous !is null) {
988             delim.previous.next = delim.next;
989         }
990         if (delim.next is null) {
991             // top of stack
992             lastDelimiter = delim.previous;
993         } else {
994             delim.next.previous = delim.previous;
995         }
996     }
997 
998     private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) {
999         // No nodes between them
1000         if (fromNode == toNode || fromNode.getNext() == toNode) {
1001             return;
1002         }
1003 
1004         mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious());
1005     }
1006 
1007     private void mergeChildTextNodes(Node node) {
1008         // No children or just one child node, no need for merging
1009         if (node.getFirstChild() == node.getLastChild()) {
1010             return;
1011         }
1012 
1013         mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild());
1014     }
1015 
1016     private void mergeTextNodesInclusive(Node fromNode, Node toNode) {
1017         Text first = null;
1018         Text last = null;
1019         int length = 0;
1020 
1021         Node node = fromNode;
1022         while (node !is null) {
1023             if (cast(Text)node !is null) {
1024                 Text text = cast(Text) node;
1025                 if (first is null) {
1026                     first = text;
1027                 }
1028                 length += text.getLiteral().length;
1029                 last = text;
1030             } else {
1031                 mergeIfNeeded(first, last, length);
1032                 first = null;
1033                 last = null;
1034                 length = 0;
1035             }
1036             if (node == toNode) {
1037                 break;
1038             }
1039             node = node.getNext();
1040         }
1041 
1042         mergeIfNeeded(first, last, length);
1043     }
1044 
1045     private void mergeIfNeeded(Text first, Text last, int textLength) {
1046         if (first !is null && last !is null && first != last) {
1047             StringBuilder sb = new StringBuilder(textLength);
1048             sb.append(first.getLiteral());
1049             Node node = first.getNext();
1050             Node stop = last.getNext();
1051             while (node != stop) {
1052                 sb.append((cast(Text) node).getLiteral());
1053                 Node unlink = node;
1054                 node = node.getNext();
1055                 unlink.unlink();
1056             }
1057             string literal = sb.toString();
1058             first.setLiteral(literal);
1059         }
1060     }
1061 
1062     private static class DelimiterData {
1063 
1064         int count;
1065         bool canClose;
1066         bool canOpen;
1067 
1068         this(int count, bool canOpen, bool canClose) {
1069             this.count = count;
1070             this.canOpen = canOpen;
1071             this.canClose = canClose;
1072         }
1073     }
1074 }