1 module hunt.markdown.internal.InlineParserImpl; 2 3 import hunt.markdown.internal.ReferenceParser; 4 import hunt.markdown.internal.inline.AsteriskDelimiterProcessor; 5 import hunt.markdown.internal.inline.UnderscoreDelimiterProcessor; 6 import hunt.markdown.internal.util.Escaping; 7 import hunt.markdown.internal.util.Html5Entities; 8 import hunt.markdown.internal.util.Parsing; 9 import hunt.markdown.node.Node; 10 import hunt.markdown.node.Text; 11 import hunt.markdown.node.Link; 12 import hunt.markdown.parser.InlineParser; 13 import hunt.markdown.parser.delimiter.DelimiterProcessor; 14 import hunt.markdown.internal.Delimiter; 15 import hunt.markdown.internal.Bracket; 16 import hunt.markdown.internal.StaggeredDelimiterProcessor; 17 import hunt.markdown.node.HardLineBreak; 18 import hunt.markdown.node.SoftLineBreak; 19 import hunt.markdown.node.Code; 20 import hunt.markdown.node.HtmlInline; 21 import hunt.markdown.node.Image; 22 23 import hunt.collection.BitSet; 24 import hunt.collection.Map; 25 import hunt.collection.Set; 26 import hunt.collection.List; 27 import hunt.collection.ArrayList; 28 import hunt.collection.HashMap; 29 import hunt.util.Common; 30 import hunt.Char; 31 import hunt.text.Common; 32 import hunt.util.StringBuilder; 33 import hunt.Exceptions; 34 35 import std.regex; 36 import std.string; 37 38 alias Character = Char; 39 40 class InlineParserImpl : InlineParser, ReferenceParser { 41 42 private enum string ESCAPED_CHAR = "\\\\" ~ Escaping.ESCAPABLE; 43 private enum string HTMLCOMMENT = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"; 44 private enum string PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; 45 private enum string DECLARATION = "<![A-Z]+\\s+[^>]*>"; 46 private enum string CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"; 47 private enum string HTMLTAG = "(?:" ~ Parsing.OPENTAG ~ "|" ~ Parsing.CLOSETAG ~ "|" ~ HTMLCOMMENT 48 ~ "|" ~ PROCESSINGINSTRUCTION ~ "|" ~ DECLARATION ~ "|" ~ CDATA ~ ")"; 49 private enum string ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; 50 51 private enum string ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; 52 private enum string PUNCTUATION = ("^[" ~ ASCII_PUNCTUATION ~ "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); 53 54 private enum string HTML_TAG = '^' ~ HTMLTAG; //i 55 56 private enum string LINK_TITLE = ( 57 "^(?:\"(" ~ ESCAPED_CHAR ~ "|[^\"\\x00])*\"" ~ 58 '|' ~ 59 "'(" ~ ESCAPED_CHAR ~ "|[^'\\x00])*'" ~ 60 '|' ~ 61 "\\((" ~ ESCAPED_CHAR ~ "|[^)\\x00])*\\))"); 62 63 private enum string LINK_DESTINATION_BRACES = ("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); 64 65 private enum string LINK_LABEL = ("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); 66 67 private enum string ESCAPABLE = ('^' ~ Escaping.ESCAPABLE); 68 69 private enum string ENTITY_HERE = '^' ~ ENTITY; //i 70 71 private enum string TICKS = ("`+"); 72 73 private enum string TICKS_HERE = ("^`+"); 74 75 private enum string EMAIL_AUTOLINK = ("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); 76 77 private enum string AUTOLINK = ("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); 78 79 private enum string SPNL = ("^ *(?:\n *)?"); 80 81 private enum string UNICODE_WHITESPACE_CHAR = ("^[\\p{Zs}\t\r\n\f]"); 82 83 private enum string WHITESPACE = ("\\s+"); 84 85 private enum string FINAL_SPACE = (" *$"); 86 87 private enum string LINE_END = ("^ *(?:\n|$)"); 88 89 private BitSet specialCharacters; 90 private BitSet delimiterCharacters; 91 private Map!(Character, DelimiterProcessor) delimiterProcessors; 92 93 /** 94 * Link references by ID, needs to be built up using parseReference before calling parse. 95 */ 96 private Map!(string, Link) referenceMap; 97 98 private Node block; 99 100 private string input; 101 private int index; 102 103 /** 104 * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different 105 * from the algorithm described in the spec.) 106 */ 107 private Delimiter lastDelimiter; 108 109 /** 110 * Top opening bracket (<code>[</code> or <code>![)</code>). 111 */ 112 private Bracket lastBracket; 113 114 public this(List!(DelimiterProcessor) delimiterProcessors) { 115 referenceMap = new HashMap!(string, Link)(); 116 this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors); 117 Character[] chars; 118 foreach(k,v;this.delimiterProcessors) 119 { 120 chars ~= k; 121 } 122 this.delimiterCharacters = calculateDelimiterCharacters(chars); 123 this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); 124 } 125 126 public static BitSet calculateDelimiterCharacters(Set!(Character) characters) { 127 BitSet bitSet = new BitSet(); 128 foreach (Character character ; characters) { 129 bitSet.set(cast(int)(character.charValue)); 130 } 131 return bitSet; 132 } 133 134 public static BitSet calculateDelimiterCharacters(Character[] characters) { 135 BitSet bitSet = new BitSet(); 136 foreach (Character character ; characters) { 137 bitSet.set(cast(int)(character.charValue)); 138 } 139 return bitSet; 140 } 141 142 public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { 143 BitSet bitSet = new BitSet(); 144 bitSet.or(delimiterCharacters); 145 bitSet.set('\n'); 146 bitSet.set('`'); 147 bitSet.set('['); 148 bitSet.set(']'); 149 bitSet.set('\\'); 150 bitSet.set('!'); 151 bitSet.set('<'); 152 bitSet.set('&'); 153 return bitSet; 154 } 155 156 public static Map!(Character, DelimiterProcessor) calculateDelimiterProcessors(List!(DelimiterProcessor) delimiterProcessors) { 157 Map!(Character, DelimiterProcessor) map = new HashMap!(Character, DelimiterProcessor)(); 158 auto list = new ArrayList!DelimiterProcessor(); 159 list.add(new AsteriskDelimiterProcessor()); 160 list.add(new UnderscoreDelimiterProcessor()); 161 addDelimiterProcessors(list, map); 162 addDelimiterProcessors(delimiterProcessors, map); 163 return map; 164 } 165 166 private static void addDelimiterProcessors(Iterable!(DelimiterProcessor) delimiterProcessors, Map!(Character, DelimiterProcessor) map) { 167 foreach (DelimiterProcessor delimiterProcessor ; delimiterProcessors) { 168 char opening = delimiterProcessor.getOpeningCharacter(); 169 char closing = delimiterProcessor.getClosingCharacter(); 170 if (opening == closing) { 171 DelimiterProcessor old = map.get(new Char(opening)); 172 if (old !is null && old.getOpeningCharacter() == old.getClosingCharacter()) { 173 StaggeredDelimiterProcessor s; 174 if (cast(StaggeredDelimiterProcessor)old !is null) { 175 s = cast(StaggeredDelimiterProcessor) old; 176 } else { 177 s = new StaggeredDelimiterProcessor(opening); 178 s.add(old); 179 } 180 s.add(delimiterProcessor); 181 map.put(new Char(opening), s); 182 } else { 183 addDelimiterProcessorForChar(opening, delimiterProcessor, map); 184 } 185 } else { 186 addDelimiterProcessorForChar(opening, delimiterProcessor, map); 187 addDelimiterProcessorForChar(closing, delimiterProcessor, map); 188 } 189 } 190 } 191 192 private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map!(Character, DelimiterProcessor) delimiterProcessors) { 193 DelimiterProcessor existing = delimiterProcessors.put(new Char(delimiterChar), toAdd); 194 if (existing !is null) { 195 throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" ~ delimiterChar ~ "'"); 196 } 197 } 198 199 /** 200 * Parse content in block into inline children, using reference map to resolve references. 201 */ 202 override public void parse(string content, Node block) { 203 this.block = block; 204 this.input = content.strip(); 205 this.index = 0; 206 this.lastDelimiter = null; 207 this.lastBracket = null; 208 209 bool moreToParse; 210 do { 211 moreToParse = parseInline(); 212 } while (moreToParse); 213 214 processDelimiters(null); 215 mergeChildTextNodes(block); 216 } 217 218 /** 219 * Attempt to parse a link reference, modifying the internal reference map. 220 */ 221 override public int parseReference(string s) { 222 this.input = s; 223 this.index = 0; 224 string dest; 225 string title; 226 int matchChars; 227 int startIndex = index; 228 229 // label: 230 matchChars = parseLinkLabel(); 231 if (matchChars == 0) { 232 return 0; 233 } 234 235 string rawLabel = input.substring(0, matchChars); 236 237 // colon: 238 if (peek() != ':') { 239 return 0; 240 } 241 index++; 242 243 // link url 244 spnl(); 245 246 dest = parseLinkDestination(); 247 if (dest is null || dest.length == 0) { 248 return 0; 249 } 250 251 int beforeTitle = index; 252 spnl(); 253 title = parseLinkTitle(); 254 if (title is null) { 255 // rewind before spaces 256 index = beforeTitle; 257 } 258 259 bool atLineEnd = true; 260 if (index != input.length && match(regex(LINE_END)) is null) { 261 if (title is null) { 262 atLineEnd = false; 263 } else { 264 // the potential title we found is not at the line end, 265 // but it could still be a legal link reference if we 266 // discard the title 267 title = null; 268 // rewind before spaces 269 index = beforeTitle; 270 // and instead check if the link URL is at the line end 271 atLineEnd = match(regex(LINE_END)) !is null; 272 } 273 } 274 275 if (!atLineEnd) { 276 return 0; 277 } 278 279 string normalizedLabel = Escaping.normalizeReference(rawLabel); 280 if (normalizedLabel.isEmpty()) { 281 return 0; 282 } 283 284 if (!referenceMap.containsKey(normalizedLabel)) { 285 Link link = new Link(dest, title); 286 referenceMap.put(normalizedLabel, link); 287 } 288 return index - startIndex; 289 } 290 291 private Text appendText(string text, int beginIndex, int endIndex) { 292 return appendText(text.substring(beginIndex, endIndex)); 293 } 294 295 private Text appendText(string text) { 296 Text node = new Text(text); 297 appendNode(node); 298 return node; 299 } 300 301 private void appendNode(Node node) { 302 block.appendChild(node); 303 } 304 305 /** 306 * Parse the next inline element in subject, advancing input index. 307 * On success, add the result to block's children and return true. 308 * On failure, return false. 309 */ 310 private bool parseInline() { 311 bool res; 312 char c = peek(); 313 if (c == '\0') { 314 return false; 315 } 316 switch (c) { 317 case '\n': 318 res = parseNewline(); 319 break; 320 case '\\': 321 res = parseBackslash(); 322 break; 323 case '`': 324 res = parseBackticks(); 325 break; 326 case '[': 327 res = parseOpenBracket(); 328 break; 329 case '!': 330 res = parseBang(); 331 break; 332 case ']': 333 res = parseCloseBracket(); 334 break; 335 case '<': 336 res = parseAutolink() || parseHtmlInline(); 337 break; 338 case '&': 339 res = parseEntity(); 340 break; 341 default: 342 bool isDelimiter = delimiterCharacters.get(c); 343 if (isDelimiter) { 344 DelimiterProcessor delimiterProcessor = delimiterProcessors.get(new Char(c)); 345 res = parseDelimiters(delimiterProcessor, c); 346 } else { 347 res = parseString(); 348 } 349 break; 350 } 351 if (!res) { 352 index++; 353 // When we get here, it's only for a single special character that turned out to not have a special meaning. 354 // So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String. 355 string literal = "" ~ c; 356 appendText(literal); 357 } 358 359 return true; 360 } 361 362 /** 363 * If RE matches at current index in the input, advance index and return the match; otherwise return null. 364 */ 365 private string match(Regex!char re) { 366 if (index >= input.length) { 367 return null; 368 } 369 auto matcher = matchAll(input[index .. $],re); 370 // matcher.region(index, cast(int)input.length); 371 // bool m = matcher.find(); 372 if (!matcher.empty()) { 373 auto cp = matcher.front.captures[0]; 374 index = index + cast(int)(input[index..$].indexOf(cp)) + cast(int)(cp.length); 375 return cp; 376 } else { 377 return null; 378 } 379 } 380 381 /** 382 * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. 383 */ 384 private char peek() { 385 if (index < input.length) { 386 return input[index]; 387 } else { 388 return '\0'; 389 } 390 } 391 392 /** 393 * Parse zero or more space characters, including at most one newline. 394 */ 395 private bool spnl() { 396 match(regex(SPNL)); 397 return true; 398 } 399 400 /** 401 * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. 402 */ 403 private bool parseNewline() { 404 index++; // assume we're at a \n 405 406 Node lastChild = block.getLastChild(); 407 // Check previous text for trailing spaces. 408 // The "endsWith" is an optimization to avoid an RE match in the common case. 409 if (lastChild !is null && cast(Text)lastChild !is null && (cast(Text) lastChild).getLiteral().endsWith(" ")) { 410 Text text = cast(Text) lastChild; 411 string literal = text.getLiteral(); 412 auto matcher = matchAll(literal,regex(FINAL_SPACE)); 413 int spaces = !matcher.empty() ? cast(int)(matcher.front.captures[0].length) : 0; 414 if (spaces > 0) { 415 text.setLiteral(literal.substring(0, cast(int)literal.length - spaces)); 416 } 417 appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); 418 } else { 419 appendNode(new SoftLineBreak()); 420 } 421 422 // gobble leading spaces in next line 423 while (peek() == ' ') { 424 index++; 425 } 426 return true; 427 } 428 429 /** 430 * Parse a backslash-escaped special character, adding either the escaped character, a hard line break 431 * (if the backslash is followed by a newline), or a literal backslash to the block's children. 432 */ 433 private bool parseBackslash() { 434 index++; 435 if (peek() == '\n') { 436 appendNode(new HardLineBreak()); 437 index++; 438 } else if (index < input.length && !matchAll(input.substring(index, index + 1),regex(ESCAPABLE)).empty()) { 439 appendText(input, index, index + 1); 440 index++; 441 } else { 442 appendText("\\"); 443 } 444 return true; 445 } 446 447 /** 448 * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks. 449 */ 450 private bool parseBackticks() { 451 string ticks = match(regex(TICKS_HERE)); 452 if (ticks is null) { 453 return false; 454 } 455 int afterOpenTicks = index; 456 string matched; 457 while ((matched = match(regex(TICKS))) !is null) { 458 if (matched == ticks) { 459 Code node = new Code(); 460 string content = input.substring(afterOpenTicks, index - ticks.length); 461 string literal = replaceAll(content.strip(), regex(WHITESPACE)," "); 462 node.setLiteral(literal); 463 appendNode(node); 464 return true; 465 } 466 } 467 // If we got here, we didn't match a closing backtick sequence. 468 index = afterOpenTicks; 469 appendText(ticks); 470 return true; 471 } 472 473 /** 474 * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. 475 */ 476 private bool parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { 477 DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar); 478 if (res is null) { 479 return false; 480 } 481 int length = res.count; 482 int startIndex = index; 483 484 index += length; 485 Text node = appendText(input, startIndex, index); 486 487 // Add entry to stack for this opener 488 lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, lastDelimiter); 489 lastDelimiter.setLength(length); 490 lastDelimiter.setOriginalLength(length); 491 if (lastDelimiter.previous !is null) { 492 lastDelimiter.previous.next = lastDelimiter; 493 } 494 495 return true; 496 } 497 498 /** 499 * Add open bracket to delimiter stack and add a text node to block's children. 500 */ 501 private bool parseOpenBracket() { 502 int startIndex = index; 503 index++; 504 505 Text node = appendText("["); 506 507 // Add entry to stack for this opener 508 addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); 509 510 return true; 511 } 512 513 /** 514 * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. 515 * Otherwise just add a text node. 516 */ 517 private bool parseBang() { 518 int startIndex = index; 519 index++; 520 if (peek() == '[') { 521 index++; 522 523 Text node = appendText("!["); 524 525 // Add entry to stack for this opener 526 addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); 527 } else { 528 appendText("!"); 529 } 530 return true; 531 } 532 533 /** 534 * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a 535 * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. 536 */ 537 private bool parseCloseBracket() { 538 index++; 539 int startIndex = index; 540 541 // Get previous `[` or `![` 542 Bracket opener = lastBracket; 543 if (opener is null) { 544 // No matching opener, just return a literal. 545 appendText("]"); 546 return true; 547 } 548 549 if (!opener.allowed) { 550 // Matching opener but it's not allowed, just return a literal. 551 appendText("]"); 552 removeLastBracket(); 553 return true; 554 } 555 556 // Check to see if we have a link/image 557 558 string dest = null; 559 string title = null; 560 bool isLinkOrImage = false; 561 562 // Maybe a inline link like `[foo](/uri "title")` 563 if (peek() == '(') { 564 index++; 565 spnl(); 566 if ((dest = parseLinkDestination()) !is null) { 567 spnl(); 568 // title needs a whitespace before 569 if (!matchAll(input.substring(index - 1, index),regex(WHITESPACE)).empty()) { 570 title = parseLinkTitle(); 571 spnl(); 572 } 573 if (peek() == ')') { 574 index++; 575 isLinkOrImage = true; 576 } else { 577 index = startIndex; 578 } 579 } 580 } 581 582 // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` 583 if (!isLinkOrImage) { 584 585 // See if there's a link label like `[bar]` or `[]` 586 int beforeLabel = index; 587 int labelLength = parseLinkLabel(); 588 string r = null; 589 if (labelLength > 2) { 590 r = input.substring(beforeLabel, beforeLabel + labelLength); 591 } else if (!opener.bracketAfter) { 592 // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. 593 // But it can only be a reference when there's no (unescaped) bracket in it. 594 // If there is, we don't even need to try to look up the reference. This is an optimization. 595 r = input.substring(opener.index, startIndex); 596 } 597 598 if (r !is null) { 599 Link link = referenceMap.get(Escaping.normalizeReference(r)); 600 if (link !is null) { 601 dest = link.getDestination(); 602 title = link.getTitle(); 603 isLinkOrImage = true; 604 } 605 } 606 } 607 608 if (isLinkOrImage) { 609 // If we got here, open is a potential opener 610 Node linkOrImage = opener._image ? new Image(dest, title) : new Link(dest, title); 611 612 Node node = opener.node.getNext(); 613 while (node !is null) { 614 Node next = node.getNext(); 615 linkOrImage.appendChild(node); 616 node = next; 617 } 618 appendNode(linkOrImage); 619 620 // Process delimiters such as emphasis inside link/image 621 processDelimiters(opener.previousDelimiter); 622 mergeChildTextNodes(linkOrImage); 623 // We don't need the corresponding text node anymore, we turned it into a link/image node 624 opener.node.unlink(); 625 removeLastBracket(); 626 627 // Links within links are not allowed. We found this link, so there can be no other link around it. 628 if (!opener._image) { 629 Bracket bracket = lastBracket; 630 while (bracket !is null) { 631 if (!bracket._image) { 632 // Disallow link opener. It will still get matched, but will not result in a link. 633 bracket.allowed = false; 634 } 635 bracket = bracket.previous; 636 } 637 } 638 639 return true; 640 641 } else { // no link or image 642 643 appendText("]"); 644 removeLastBracket(); 645 646 index = startIndex; 647 return true; 648 } 649 } 650 651 private void addBracket(Bracket bracket) { 652 if (lastBracket !is null) { 653 lastBracket.bracketAfter = true; 654 } 655 lastBracket = bracket; 656 } 657 658 private void removeLastBracket() { 659 lastBracket = lastBracket.previous; 660 } 661 662 /** 663 * Attempt to parse link destination, returning the string or null if no match. 664 */ 665 private string parseLinkDestination() { 666 string res = match(regex(LINK_DESTINATION_BRACES)); 667 if (res !is null) { // chop off surrounding <..>: 668 if (res.length == 2) { 669 return ""; 670 } else { 671 return Escaping.unescapeString(res.substring(1, cast(int)res.length - 1)); 672 } 673 } else { 674 int startIndex = index; 675 parseLinkDestinationWithBalancedParens(); 676 return Escaping.unescapeString(input.substring(startIndex, index)); 677 } 678 } 679 680 private void parseLinkDestinationWithBalancedParens() { 681 int parens = 0; 682 while (true) { 683 char c = peek(); 684 switch (c) { 685 case '\0': 686 return; 687 case '\\': 688 // check if we have an escapable character 689 if (index + 1 < input.length && !matchAll(input.substring(index + 1, index + 2),regex(ESCAPABLE)).empty()) { 690 // skip over the escaped character (after switch) 691 index++; 692 break; 693 } 694 // otherwise, we treat this as a literal backslash 695 break; 696 case '(': 697 parens++; 698 break; 699 case ')': 700 if (parens == 0) { 701 return; 702 } else { 703 parens--; 704 } 705 break; 706 case ' ': 707 // ASCII space 708 return; 709 default: 710 // or control character 711 if (Char.isISOControl(c)) { 712 return; 713 } 714 } 715 index++; 716 } 717 } 718 719 /** 720 * Attempt to parse link title (sans quotes), returning the string or null if no match. 721 */ 722 private string parseLinkTitle() { 723 string title = match(regex(LINK_TITLE)); 724 if (title !is null) { 725 // chop off quotes from title and unescape: 726 return Escaping.unescapeString(title.substring(1, cast(int)title.length - 1)); 727 } else { 728 return null; 729 } 730 } 731 732 /** 733 * Attempt to parse a link label, returning number of characters parsed. 734 */ 735 private int parseLinkLabel() { 736 string m = match(regex(LINK_LABEL)); 737 // Spec says "A link label can have at most 999 characters inside the square brackets" 738 if (m is null || m.length > 1001) { 739 return 0; 740 } else { 741 return cast(int)(m.length); 742 } 743 } 744 745 /** 746 * Attempt to parse an autolink (URL or email in pointy brackets). 747 */ 748 private bool parseAutolink() { 749 string m; 750 if ((m = match(regex(EMAIL_AUTOLINK))) !is null) { 751 string dest = m.substring(1, cast(int)m.length - 1); 752 Link node = new Link("mailto:" ~ dest, null); 753 node.appendChild(new Text(dest)); 754 appendNode(node); 755 return true; 756 } else if ((m = match(regex(AUTOLINK))) !is null) { 757 string dest = m.substring(1, cast(int)m.length - 1); 758 Link node = new Link(dest, null); 759 node.appendChild(new Text(dest)); 760 appendNode(node); 761 return true; 762 } else { 763 return false; 764 } 765 } 766 767 /** 768 * Attempt to parse inline HTML. 769 */ 770 private bool parseHtmlInline() { 771 string m = this.match(regex(HTML_TAG,"i")); 772 if (m !is null) { 773 HtmlInline node = new HtmlInline(); 774 node.setLiteral(m); 775 appendNode(node); 776 return true; 777 } else { 778 return false; 779 } 780 } 781 782 /** 783 * Attempt to parse an entity, return Entity object if successful. 784 */ 785 private bool parseEntity() { 786 string m; 787 if ((m = match(regex(ENTITY_HERE,"i"))) !is null) { 788 appendText(Html5Entities.entityToString(m)); 789 return true; 790 } else { 791 return false; 792 } 793 } 794 795 /** 796 * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. 797 */ 798 private bool parseString() { 799 int begin = index; 800 int length = cast(int)(input.length); 801 while (index != length) { 802 if (specialCharacters.get(input[index])) { 803 break; 804 } 805 index++; 806 } 807 if (begin != index) { 808 appendText(input, begin, index); 809 return true; 810 } else { 811 return false; 812 } 813 } 814 815 /** 816 * Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters 817 * and whether they are positioned such that they can open and/or close emphasis or strong emphasis. 818 * 819 * @return information about delimiter run, or {@code null} 820 */ 821 private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { 822 int startIndex = index; 823 824 int delimiterCount = 0; 825 while (peek() == delimiterChar) { 826 delimiterCount++; 827 index++; 828 } 829 830 if (delimiterCount < delimiterProcessor.getMinLength()) { 831 index = startIndex; 832 return null; 833 } 834 835 string before = startIndex == 0 ? "\n" : 836 input.substring(startIndex - 1, startIndex); 837 838 char charAfter = peek(); 839 string after = charAfter == '\0' ? "\n" : 840 "" ~ (charAfter); 841 842 // We could be more lazy here, in most cases we don't need to do every match case. 843 bool beforeIsPunctuation = !matchAll(before,PUNCTUATION).empty(); 844 bool beforeIsWhitespace = !matchAll(before,regex(UNICODE_WHITESPACE_CHAR)).empty(); 845 bool afterIsPunctuation = !matchAll(after,PUNCTUATION).empty(); 846 bool afterIsWhitespace = !matchAll(after,regex(UNICODE_WHITESPACE_CHAR)).empty(); 847 848 bool leftFlanking = !afterIsWhitespace && 849 (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation); 850 bool rightFlanking = !beforeIsWhitespace && 851 (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation); 852 bool canOpen; 853 bool canClose; 854 if (delimiterChar == '_') { 855 canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation); 856 canClose = rightFlanking && (!leftFlanking || afterIsPunctuation); 857 } else { 858 canOpen = leftFlanking && delimiterChar == delimiterProcessor.getOpeningCharacter(); 859 canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter(); 860 } 861 862 index = startIndex; 863 return new DelimiterData(delimiterCount, canOpen, canClose); 864 } 865 866 private void processDelimiters(Delimiter stackBottom) { 867 868 Map!(Character, Delimiter) openersBottom = new HashMap!(Character, Delimiter)(); 869 870 // find first closer above stackBottom: 871 Delimiter closer = lastDelimiter; 872 while (closer !is null && closer.previous != stackBottom) { 873 closer = closer.previous; 874 } 875 // move forward, looking for closers, and handling each 876 while (closer !is null) { 877 char delimiterChar = closer.delimiterChar; 878 879 DelimiterProcessor delimiterProcessor = delimiterProcessors.get(new Char(delimiterChar)); 880 if (!closer.canClose || delimiterProcessor is null) { 881 closer = closer.next; 882 continue; 883 } 884 885 char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); 886 887 // Found delimiter closer. Now look back for first matching opener. 888 int useDelims = 0; 889 bool openerFound = false; 890 bool potentialOpenerFound = false; 891 Delimiter opener = closer.previous; 892 while (opener !is null && opener != stackBottom && opener != openersBottom.get(new Char(delimiterChar))) { 893 if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { 894 potentialOpenerFound = true; 895 useDelims = delimiterProcessor.getDelimiterUse(opener, closer); 896 if (useDelims > 0) { 897 openerFound = true; 898 break; 899 } 900 } 901 opener = opener.previous; 902 } 903 904 if (!openerFound) { 905 if (!potentialOpenerFound) { 906 // Set lower bound for future searches for openers. 907 // Only do this when we didn't even have a potential 908 // opener (one that matches the character and can open). 909 // If an opener was rejected because of the number of 910 // delimiters (e.g. because of the "multiple of 3" rule), 911 // we want to consider it next time because the number 912 // of delimiters can change as we continue processing. 913 openersBottom.put(new Char(delimiterChar), closer.previous); 914 if (!closer.canOpen) { 915 // We can remove a closer that can't be an opener, 916 // once we've seen there's no matching opener: 917 removeDelimiterKeepNode(closer); 918 } 919 } 920 closer = closer.next; 921 continue; 922 } 923 924 Text openerNode = opener.node; 925 Text closerNode = closer.node; 926 927 // Remove number of used delimiters from stack and inline nodes. 928 opener.setLength(opener.length - useDelims); 929 closer.setLength(opener.length - useDelims); 930 openerNode.setLiteral( 931 openerNode.getLiteral().substring(0, 932 openerNode.getLiteral().length - useDelims)); 933 closerNode.setLiteral( 934 closerNode.getLiteral().substring(0, 935 closerNode.getLiteral().length - useDelims)); 936 937 removeDelimitersBetween(opener, closer); 938 // The delimiter processor can re-parent the nodes between opener and closer, 939 // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. 940 mergeTextNodesBetweenExclusive(openerNode, closerNode); 941 delimiterProcessor.process(openerNode, closerNode, useDelims); 942 943 // No delimiter characters left to process, so we can remove delimiter and the now empty node. 944 if (opener.length == 0) { 945 removeDelimiterAndNode(opener); 946 } 947 948 if (closer.length == 0) { 949 Delimiter next = closer.next; 950 removeDelimiterAndNode(closer); 951 closer = next; 952 } 953 } 954 955 // remove all delimiters 956 while (lastDelimiter !is null && lastDelimiter != stackBottom) { 957 removeDelimiterKeepNode(lastDelimiter); 958 } 959 } 960 961 private void removeDelimitersBetween(Delimiter opener, Delimiter closer) { 962 Delimiter delimiter = closer.previous; 963 while (delimiter !is null && delimiter != opener) { 964 Delimiter previousDelimiter = delimiter.previous; 965 removeDelimiterKeepNode(delimiter); 966 delimiter = previousDelimiter; 967 } 968 } 969 970 /** 971 * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. 972 */ 973 private void removeDelimiterAndNode(Delimiter delim) { 974 Text node = delim.node; 975 node.unlink(); 976 removeDelimiter(delim); 977 } 978 979 /** 980 * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. 981 */ 982 private void removeDelimiterKeepNode(Delimiter delim) { 983 removeDelimiter(delim); 984 } 985 986 private void removeDelimiter(Delimiter delim) { 987 if (delim.previous !is null) { 988 delim.previous.next = delim.next; 989 } 990 if (delim.next is null) { 991 // top of stack 992 lastDelimiter = delim.previous; 993 } else { 994 delim.next.previous = delim.previous; 995 } 996 } 997 998 private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { 999 // No nodes between them 1000 if (fromNode == toNode || fromNode.getNext() == toNode) { 1001 return; 1002 } 1003 1004 mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); 1005 } 1006 1007 private void mergeChildTextNodes(Node node) { 1008 // No children or just one child node, no need for merging 1009 if (node.getFirstChild() == node.getLastChild()) { 1010 return; 1011 } 1012 1013 mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); 1014 } 1015 1016 private void mergeTextNodesInclusive(Node fromNode, Node toNode) { 1017 Text first = null; 1018 Text last = null; 1019 int length = 0; 1020 1021 Node node = fromNode; 1022 while (node !is null) { 1023 if (cast(Text)node !is null) { 1024 Text text = cast(Text) node; 1025 if (first is null) { 1026 first = text; 1027 } 1028 length += text.getLiteral().length; 1029 last = text; 1030 } else { 1031 mergeIfNeeded(first, last, length); 1032 first = null; 1033 last = null; 1034 length = 0; 1035 } 1036 if (node == toNode) { 1037 break; 1038 } 1039 node = node.getNext(); 1040 } 1041 1042 mergeIfNeeded(first, last, length); 1043 } 1044 1045 private void mergeIfNeeded(Text first, Text last, int textLength) { 1046 if (first !is null && last !is null && first != last) { 1047 StringBuilder sb = new StringBuilder(textLength); 1048 sb.append(first.getLiteral()); 1049 Node node = first.getNext(); 1050 Node stop = last.getNext(); 1051 while (node != stop) { 1052 sb.append((cast(Text) node).getLiteral()); 1053 Node unlink = node; 1054 node = node.getNext(); 1055 unlink.unlink(); 1056 } 1057 string literal = sb.toString(); 1058 first.setLiteral(literal); 1059 } 1060 } 1061 1062 private static class DelimiterData { 1063 1064 int count; 1065 bool canClose; 1066 bool canOpen; 1067 1068 this(int count, bool canOpen, bool canClose) { 1069 this.count = count; 1070 this.canOpen = canOpen; 1071 this.canClose = canClose; 1072 } 1073 } 1074 }