1 module hunt.markdown.internal.DocumentParser; 2 3 import hunt.markdown.internal.ReferenceParser; 4 import hunt.markdown.internal.util.Parsing; 5 import hunt.markdown.internal.DocumentBlockParser; 6 import hunt.markdown.internal.BlockQuoteParser; 7 import hunt.markdown.internal.BlockStartImpl; 8 import hunt.markdown.internal.HeadingParser; 9 import hunt.markdown.internal.HtmlBlockParser; 10 import hunt.markdown.internal.FencedCodeBlockParser; 11 import hunt.markdown.internal.ThematicBreakParser; 12 import hunt.markdown.internal.ListBlockParser; 13 import hunt.markdown.internal.IndentedCodeBlockParser; 14 import hunt.markdown.internal.ParagraphParser; 15 import hunt.markdown.node.Block; 16 import hunt.markdown.node.Document; 17 import hunt.markdown.node.BlockQuote; 18 import hunt.markdown.node.FencedCodeBlock; 19 import hunt.markdown.node.Heading; 20 import hunt.markdown.node.HtmlBlock; 21 import hunt.markdown.node.ThematicBreak; 22 import hunt.markdown.node.ListBlock; 23 import hunt.markdown.node.IndentedCodeBlock; 24 import hunt.markdown.node.Paragraph; 25 import hunt.markdown.parser.InlineParser; 26 import hunt.markdown.parser.block.BlockParser; 27 import hunt.markdown.parser.block.BlockParserFactory; 28 import hunt.markdown.parser.block.ParserState; 29 import hunt.markdown.parser.block.BlockStart; 30 import hunt.markdown.parser.block.MatchedBlockParser; 31 import hunt.markdown.parser.block.BlockContinue; 32 import hunt.markdown.internal.BlockContinueImpl; 33 import hunt.markdown.parser.block.AbstractBlockParser; 34 35 import hunt.collection.Collections; 36 import hunt.collection.Map; 37 import hunt.collection.Set; 38 import hunt.collection.List; 39 import hunt.collection.HashSet; 40 import hunt.collection.HashMap; 41 import hunt.collection.LinkedHashMap; 42 import hunt.collection.LinkedHashSet; 43 import hunt.collection.ArrayList; 44 import hunt.Exceptions; 45 import hunt.logging; 46 import hunt.text; 47 import std.stdio; 48 49 class DocumentParser : ParserState 50 { 51 52 private __gshared Set!(TypeInfo_Class) CORE_FACTORY_TYPES; 53 54 private __gshared Map!(TypeInfo_Class, BlockParserFactory) NODES_TO_CORE_FACTORIES; 55 56 shared static this() 57 { 58 CORE_FACTORY_TYPES = new LinkedHashSet!(TypeInfo_Class)([typeid(BlockQuote), typeid(Heading), 59 typeid(FencedCodeBlock), typeid(HtmlBlock), typeid(ThematicBreak), 60 typeid(ListBlock), typeid(IndentedCodeBlock)]); 61 62 Map!(TypeInfo_Class, BlockParserFactory) map = new HashMap!(TypeInfo_Class, 63 BlockParserFactory)(); 64 map.put(typeid(BlockQuote), new BlockQuoteParser.Factory()); 65 map.put(typeid(Heading), new HeadingParser.Factory()); 66 map.put(typeid(FencedCodeBlock), new FencedCodeBlockParser.Factory()); 67 map.put(typeid(HtmlBlock), new HtmlBlockParser.Factory()); 68 map.put(typeid(ThematicBreak), new ThematicBreakParser.Factory()); 69 map.put(typeid(ListBlock), new ListBlockParser.Factory()); 70 map.put(typeid(IndentedCodeBlock), new IndentedCodeBlockParser.Factory()); 71 72 NODES_TO_CORE_FACTORIES = map; // Collections.unmodifiableMap(map); 73 } 74 75 private string line; 76 77 /** 78 * current index (offset) in input line (0-based) 79 */ 80 private int index = 0; 81 82 /** 83 * current column of input line (tab causes column to go to next 4-space tab stop) (0-based) 84 */ 85 private int column = 0; 86 87 /** 88 * if the current column is within a tab character (partially consumed tab) 89 */ 90 private bool columnIsInTab; 91 92 private int nextNonSpace = 0; 93 private int nextNonSpaceColumn = 0; 94 private int indent = 0; 95 private bool blank; 96 97 private List!(BlockParserFactory) blockParserFactories; 98 private InlineParser inlineParser; 99 private DocumentBlockParser documentBlockParser; 100 101 private List!(BlockParser) activeBlockParsers; 102 private Set!(BlockParser) allBlockParsers; 103 104 public this(List!(BlockParserFactory) blockParserFactories, InlineParser inlineParser) 105 { 106 107 activeBlockParsers = new ArrayList!(BlockParser)(); 108 allBlockParsers = new HashSet!(BlockParser)(); 109 version(HUNT_DEBUG)logDebug("blockParserFactories size :",blockParserFactories.size); 110 this.blockParserFactories = blockParserFactories; 111 this.inlineParser = inlineParser; 112 113 this.documentBlockParser = new DocumentBlockParser(); 114 activateBlockParser(this.documentBlockParser); 115 } 116 117 public static Set!(TypeInfo_Class) getDefaultBlockParserTypes() 118 { 119 return CORE_FACTORY_TYPES; 120 } 121 122 public static List!(BlockParserFactory) calculateBlockParserFactories(List!( 123 BlockParserFactory) customBlockParserFactories, Set!(TypeInfo_Class) enabledBlockTypes) 124 { 125 List!(BlockParserFactory) list = new ArrayList!(BlockParserFactory)(); 126 // By having the custom factories come first, extensions are able to change behavior of core syntax. 127 list.addAll(customBlockParserFactories); 128 foreach (blockType; enabledBlockTypes) 129 { 130 list.add(NODES_TO_CORE_FACTORIES.get(blockType)); 131 } 132 return list; 133 } 134 135 /** 136 * The main parsing function. Returns a parsed document AST. 137 */ 138 public Document parse(string input) 139 { 140 int lineStart = 0; 141 int lineBreak; 142 while ((lineBreak = Parsing.findLineBreak(input, lineStart)) != -1) 143 { 144 string line = input.substring(lineStart, lineBreak); 145 incorporateLine(line); 146 if (lineBreak + 1 < input.length && input[lineBreak] == '\r' 147 && input.charAt(lineBreak + 1) == '\n') 148 { 149 lineStart = lineBreak + 2; 150 } 151 else 152 { 153 lineStart = lineBreak + 1; 154 } 155 } 156 if (input.length > 0 && (lineStart == 0 || lineStart < input.length)) 157 { 158 string line = input.substring(lineStart); 159 incorporateLine(line); 160 } 161 162 return finalizeAndProcess(); 163 } 164 165 // public Document parse(Reader input) throws IOException { 166 // BufferedReader bufferedReader; 167 // if (cast(BufferedReader)input !is null) { 168 // bufferedReader = (BufferedReader) input; 169 // } else { 170 // bufferedReader = new BufferedReader(input); 171 // } 172 173 // string line; 174 // while ((line = bufferedReader.readLine()) !is null) { 175 // incorporateLine(line); 176 // } 177 178 // return finalizeAndProcess(); 179 // } 180 181 override public string getLine() 182 { 183 return line; 184 } 185 186 override public int getIndex() 187 { 188 return index; 189 } 190 191 override public int getNextNonSpaceIndex() 192 { 193 return nextNonSpace; 194 } 195 196 override public int getColumn() 197 { 198 return column; 199 } 200 201 override public int getIndent() 202 { 203 return indent; 204 } 205 206 override public bool isBlank() 207 { 208 return blank; 209 } 210 211 override public BlockParser getActiveBlockParser() 212 { 213 auto bp = activeBlockParsers.get(activeBlockParsers.size() - 1); 214 assert(bp !is null); 215 return bp; 216 } 217 218 /** 219 * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each 220 * line of input, then finalizing the document. 221 */ 222 private void incorporateLine(string ln) 223 { 224 line = Parsing.prepareLine(ln); 225 version(HUNT_DEBUG)logDebug("prepareLine line : ", line); 226 index = 0; 227 column = 0; 228 columnIsInTab = false; 229 230 // For each containing block, try to parse the associated line start. 231 // Bail out on failure: container will point to the last matching block. 232 // Set all_matched to false if not all containers match. 233 // The document will always match, can be skipped 234 int matches = 1; 235 List!BlockParser tempList = new ArrayList!BlockParser(); 236 for (int i = 1; i < activeBlockParsers.size(); i++) 237 { 238 tempList.add(activeBlockParsers.get(i)); 239 } 240 foreach (ref BlockParser blockParser; tempList) 241 { 242 findNextNonSpace(); 243 BlockContinue result = blockParser.tryContinue(this); 244 if (cast(BlockContinueImpl) result !is null) 245 { 246 BlockContinueImpl blockContinue = cast(BlockContinueImpl) result; 247 if (blockContinue.isFinalize()) 248 { 249 finalize(blockParser); 250 return; 251 } 252 else 253 { 254 if (blockContinue.getNewIndex() != -1) 255 { 256 setNewIndex(blockContinue.getNewIndex()); 257 } 258 else if (blockContinue.getNewColumn() != -1) 259 { 260 setNewColumn(blockContinue.getNewColumn()); 261 } 262 matches++; 263 } 264 } 265 else 266 { 267 break; 268 } 269 } 270 List!BlockParser tempList2 = new ArrayList!BlockParser(); 271 for (int i = matches; i < activeBlockParsers.size(); i++) 272 { 273 tempList2.add(activeBlockParsers.get(i)); 274 } 275 List!(BlockParser) unmatchedBlockParsers = new ArrayList!(BlockParser)(tempList2); 276 BlockParser lastMatchedBlockParser = activeBlockParsers.get(matches - 1); 277 BlockParser blockParser = lastMatchedBlockParser; 278 bool allClosed = unmatchedBlockParsers.isEmpty(); 279 280 // Unless last matched container is a code block, try new container starts, 281 // adding children to the last matched container: 282 bool tryBlockStarts = cast(Paragraph)(blockParser.getBlock()) !is null 283 || blockParser.isContainer(); 284 while (tryBlockStarts) 285 { 286 findNextNonSpace(); 287 288 // this is a little performance optimization: 289 if (isBlank() || (indent < Parsing.CODE_BLOCK_INDENT 290 && Parsing.isLetter(line, nextNonSpace))) 291 { 292 setNewIndex(nextNonSpace); 293 break; 294 } 295 296 BlockStartImpl blockStart = findBlockStart(blockParser); 297 if (blockStart is null) 298 { 299 setNewIndex(nextNonSpace); 300 break; 301 } 302 303 if (!allClosed) 304 { 305 finalizeBlocks(unmatchedBlockParsers); 306 allClosed = true; 307 } 308 309 if (blockStart.getNewIndex() != -1) 310 { 311 setNewIndex(blockStart.getNewIndex()); 312 } 313 else if (blockStart.getNewColumn() != -1) 314 { 315 setNewColumn(blockStart.getNewColumn()); 316 } 317 318 if (blockStart.isReplaceActiveBlockParser()) 319 { 320 removeActiveBlockParser(); 321 } 322 int i = 0; 323 auto bps = blockStart.getBlockParsers(); 324 foreach (BlockParser newBlockParser; bps) 325 { 326 blockParser = addChild(newBlockParser); 327 tryBlockStarts = newBlockParser.isContainer(); 328 } 329 } 330 331 // What remains at the offset is a text line. Add the text to the 332 // appropriate block. 333 334 // First check for a lazy paragraph continuation: 335 if (!allClosed && !isBlank() && cast(ParagraphParser) getActiveBlockParser() !is null) 336 { 337 // lazy paragraph continuation 338 addLine(); 339 340 } 341 else 342 { 343 344 // finalize any blocks not matched 345 if (!allClosed) 346 { 347 finalizeBlocks(unmatchedBlockParsers); 348 } 349 350 if (!blockParser.isContainer()) 351 { 352 addLine(); 353 } 354 else if (!isBlank()) 355 { 356 // create paragraph container for line 357 addChild(new ParagraphParser()); 358 addLine(); 359 } 360 } 361 } 362 363 private void findNextNonSpace() 364 { 365 int i = index; 366 int cols = column; 367 368 blank = true; 369 int length = cast(int) line.length; 370 while (i < length) 371 { 372 char c = line[i]; 373 switch (c) 374 { 375 case ' ': 376 i++; 377 cols++; 378 continue; 379 case '\t': 380 i++; 381 cols += (4 - (cols % 4)); 382 continue; 383 default: 384 break; 385 } 386 blank = false; 387 break; 388 } 389 390 nextNonSpace = i; 391 nextNonSpaceColumn = cols; 392 indent = nextNonSpaceColumn - column; 393 } 394 395 private void setNewIndex(int newIndex) 396 { 397 if (newIndex >= nextNonSpace) 398 { 399 // We can start from here, no need to calculate tab stops again 400 index = nextNonSpace; 401 column = nextNonSpaceColumn; 402 } 403 int length = cast(int) line.length; 404 while (index < newIndex && index != length) 405 { 406 advance(); 407 } 408 // If we're going to an index as opposed to a column, we're never within a tab 409 columnIsInTab = false; 410 } 411 412 private void setNewColumn(int newColumn) 413 { 414 if (newColumn >= nextNonSpaceColumn) 415 { 416 // We can start from here, no need to calculate tab stops again 417 index = nextNonSpace; 418 column = nextNonSpaceColumn; 419 } 420 int length = cast(int) line.length; 421 while (column < newColumn && index != length) 422 { 423 advance(); 424 } 425 if (column > newColumn) 426 { 427 // Last character was a tab and we overshot our target 428 index--; 429 column = newColumn; 430 columnIsInTab = true; 431 } 432 else 433 { 434 columnIsInTab = false; 435 } 436 } 437 438 private void advance() 439 { 440 char c = line[index]; 441 if (c == '\t') 442 { 443 index++; 444 column += Parsing.columnsToNextTabStop(column); 445 } 446 else 447 { 448 index++; 449 column++; 450 } 451 } 452 453 /** 454 * Add line content to the active block parser. We assume it can accept lines -- that check should be done before 455 * calling this. 456 */ 457 private void addLine() 458 { 459 string content; 460 if (columnIsInTab) 461 { 462 // Our column is in a partially consumed tab. Expand the remaining columns (to the next tab stop) to spaces. 463 int afterTab = index + 1; 464 string rest = line.substring(afterTab, cast(int) line.length); 465 int spaces = Parsing.columnsToNextTabStop(column); 466 StringBuilder sb = new StringBuilder(spaces + rest.length); 467 for (int i = 0; i < spaces; i++) 468 { 469 sb.append(' '); 470 } 471 sb.append(rest); 472 content = sb.toString(); 473 } 474 else 475 { 476 content = line.substring(index, cast(int) line.length); 477 } 478 getActiveBlockParser().addLine(content); 479 } 480 481 private BlockStartImpl findBlockStart(BlockParser blockParser) 482 { 483 MatchedBlockParser matchedBlockParser = new MatchedBlockParserImpl(blockParser); 484 foreach (BlockParserFactory blockParserFactory; blockParserFactories) 485 { 486 BlockStart result = blockParserFactory.tryStart(this, matchedBlockParser); 487 if (cast(BlockStartImpl) result !is null) 488 { 489 return cast(BlockStartImpl) result; 490 } 491 } 492 return null; 493 } 494 495 /** 496 * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings, 497 * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference 498 * definitions. 499 */ 500 private void finalize(BlockParser blockParser) 501 { 502 // logDebug("!!!!"); 503 if (getActiveBlockParser() is blockParser) 504 { 505 deactivateBlockParser(); 506 } 507 508 blockParser.closeBlock(); 509 510 if (cast(ParagraphParser)blockParser !is null && cast(ReferenceParser)inlineParser !is null) 511 { 512 ParagraphParser paragraphParser = cast(ParagraphParser) blockParser; 513 paragraphParser.closeBlock(cast(ReferenceParser) inlineParser); 514 } 515 } 516 517 /** 518 * Walk through a block & children recursively, parsing string content into inline content where appropriate. 519 */ 520 private void processInlines() 521 { 522 foreach (BlockParser blockParser; allBlockParsers) 523 { 524 blockParser.parseInlines(inlineParser); 525 } 526 } 527 528 /** 529 * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try 530 * its parent, and so on til we find a block that can accept children. 531 */ 532 private BlockParser addChild(BlockParser blockParser) 533 { 534 try 535 { 536 if (blockParser is null) 537 return null; 538 539 while (blockParser !is null && getActiveBlockParser() !is null 540 && !(getActiveBlockParser().canContain(blockParser.getBlock()))) 541 { 542 finalize(getActiveBlockParser()); 543 } 544 545 getActiveBlockParser().getBlock().appendChild(blockParser.getBlock()); 546 activateBlockParser(blockParser); 547 548 } 549 catch (Throwable e) 550 { 551 logError("msg : ", e.msg); 552 } 553 return blockParser; 554 555 } 556 557 private void activateBlockParser(BlockParser blockParser) 558 { 559 activeBlockParsers.add(blockParser); 560 allBlockParsers.add(blockParser); 561 } 562 563 private void deactivateBlockParser() 564 { 565 activeBlockParsers.removeAt(activeBlockParsers.size() - 1); 566 } 567 568 private void removeActiveBlockParser() 569 { 570 BlockParser old = getActiveBlockParser(); 571 deactivateBlockParser(); 572 auto f = allBlockParsers.remove(old); 573 assert(f); 574 575 old.getBlock().unlink(); 576 } 577 578 /** 579 * Finalize blocks of previous line. Returns true. 580 */ 581 private void finalizeBlocks(List!(BlockParser) blockParsers) 582 { 583 for (int i = blockParsers.size() - 1; i >= 0; i--) 584 { 585 BlockParser blockParser = blockParsers.get(i); 586 finalize(blockParser); 587 } 588 } 589 590 private Document finalizeAndProcess() 591 { 592 finalizeBlocks(this.activeBlockParsers); 593 this.processInlines(); 594 return this.documentBlockParser.getBlock(); 595 } 596 597 private static class MatchedBlockParserImpl : MatchedBlockParser 598 { 599 600 private BlockParser matchedBlockParser; 601 602 public this(BlockParser matchedBlockParser) 603 { 604 this.matchedBlockParser = matchedBlockParser; 605 } 606 607 override public BlockParser getMatchedBlockParser() 608 { 609 return matchedBlockParser; 610 } 611 612 override public string getParagraphContent() 613 { 614 if (cast(ParagraphParser) matchedBlockParser !is null) 615 { 616 ParagraphParser paragraphParser = cast(ParagraphParser) matchedBlockParser; 617 return paragraphParser.getContentString(); 618 } 619 return null; 620 } 621 } 622 }