1 module hunt.markdown.parser.Parser; 2 3 import hunt.markdown.Extension; 4 import hunt.markdown.internal.DocumentParser; 5 import hunt.markdown.internal.InlineParserImpl; 6 import hunt.markdown.node.Node; 7 import hunt.markdown.node.Block; 8 import hunt.markdown.parser.block.BlockParserFactory; 9 import hunt.markdown.parser.delimiter.DelimiterProcessor; 10 import hunt.markdown.parser.InlineParserContext; 11 import hunt.markdown.parser.InlineParserFactory; 12 import hunt.markdown.parser.InlineParser; 13 import hunt.markdown.parser.PostProcessor; 14 15 import hunt.Exceptions; 16 import hunt.util.Common; 17 import hunt.collection.ArrayList; 18 import hunt.collection.List; 19 import hunt.collection.Set; 20 21 /** 22 * Parses input text to a tree of nodes. 23 * <p> 24 * Start with the {@link #builder} method, configure the parser and build it. Example: 25 * <pre><code> 26 * Parser parser = Parser.builder().build(); 27 * Node document = parser.parse("input text"); 28 * </code></pre> 29 */ 30 class Parser { 31 32 private List!(BlockParserFactory) blockParserFactories; 33 private List!(DelimiterProcessor) delimiterProcessors; 34 private InlineParserFactory _inlineParserFactory; 35 private List!(PostProcessor) postProcessors; 36 37 private this(Builder builder) { 38 this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder._enabledBlockTypes); 39 this._inlineParserFactory = builder._inlineParserFactory; 40 this.postProcessors = builder.postProcessors; 41 this.delimiterProcessors = builder.delimiterProcessors; 42 43 // Try to construct an inline parser. This might raise exceptions in case of invalid configuration. 44 getInlineParser(); 45 } 46 47 /** 48 * Create a new builder for configuring a {@link Parser}. 49 * 50 * @return a builder 51 */ 52 public static Builder builder() { 53 return new Builder(); 54 } 55 56 /** 57 * Parse the specified input text into a tree of nodes. 58 * <p> 59 * This method is thread-safe (a new parser state is used for each invocation). 60 * 61 * @param input the text to parse 62 * @return the root node 63 */ 64 public Node parse(string input) { 65 InlineParser inlineParser = getInlineParser(); 66 DocumentParser documentParser = new DocumentParser(blockParserFactories, inlineParser); 67 Node document = documentParser.parse(input); 68 return postProcess(document); 69 } 70 71 /** 72 * Parse the specified reader into a tree of nodes. The caller is responsible for closing the reader. 73 * <pre><code> 74 * Parser parser = Parser.builder().build(); 75 * try (InputStreamReader reader = new InputStreamReader(new FileInputStream("file.md"), StandardCharsets.UTF_8)) { 76 * Node document = parser.parseReader(reader); 77 * // ... 78 * } 79 * </code></pre> 80 * Note that if you have a file with a byte order mark (BOM), you need to skip it before handing the reader to this 81 * library. There's existing classes that do that, e.g. see {@code BOMInputStream} in Commons IO. 82 * <p> 83 * This method is thread-safe (a new parser state is used for each invocation). 84 * 85 * @param input the reader to parse 86 * @return the root node 87 * @throws IOException when reading throws an exception 88 */ 89 // public Node parseReader(Reader input) 90 // { 91 // InlineParser inlineParser = getInlineParser(); 92 // DocumentParser documentParser = new DocumentParser(blockParserFactories, inlineParser); 93 // Node document = documentParser.parse(input); 94 // return postProcess(document); 95 // } 96 97 private InlineParser getInlineParser() { 98 if (this._inlineParserFactory is null) { 99 return new InlineParserImpl(delimiterProcessors); 100 } else { 101 CustomInlineParserContext inlineParserContext = new CustomInlineParserContext(delimiterProcessors); 102 return this._inlineParserFactory.create(inlineParserContext); 103 } 104 } 105 106 private Node postProcess(Node document) { 107 foreach (PostProcessor postProcessor ; postProcessors) { 108 document = postProcessor.process(document); 109 } 110 return document; 111 } 112 113 private class CustomInlineParserContext : InlineParserContext { 114 115 private List!(DelimiterProcessor) delimiterProcessors; 116 117 this(List!(DelimiterProcessor) delimiterProcessors) { 118 this.delimiterProcessors = delimiterProcessors; 119 } 120 121 override public List!(DelimiterProcessor) getCustomDelimiterProcessors() { 122 return delimiterProcessors; 123 } 124 } 125 126 /** 127 * Builder for configuring a {@link Parser}. 128 */ 129 public static class Builder { 130 private List!(BlockParserFactory) blockParserFactories; 131 private List!(DelimiterProcessor) delimiterProcessors; 132 private List!(PostProcessor) postProcessors; 133 private Set!(TypeInfo_Class) _enabledBlockTypes; 134 private InlineParserFactory _inlineParserFactory = null; 135 136 this() 137 { 138 blockParserFactories = new ArrayList!(BlockParserFactory)(); 139 delimiterProcessors = new ArrayList!(DelimiterProcessor)(); 140 postProcessors = new ArrayList!(PostProcessor)(); 141 _enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes(); 142 } 143 144 /** 145 * @return the configured {@link Parser} 146 */ 147 public Parser build() { 148 return new Parser(this); 149 } 150 151 /** 152 * @param extensions extensions to use on this parser 153 * @return {@code this} 154 */ 155 public Builder extensions(Iterable!Extension extensions) { 156 foreach (Extension extension ; extensions) { 157 if (cast(ParserExtension)extension !is null) { 158 ParserExtension parserExtension = cast(ParserExtension) extension; 159 parserExtension.extend(this); 160 } 161 } 162 return this; 163 } 164 165 /** 166 * Describe the list of markdown features the parser will recognize and parse. 167 * <p> 168 * By default, CommonMark will recognize and parse the following set of "block" elements: 169 * <ul> 170 * <li>{@link Heading} ({@code #}) 171 * <li>{@link HtmlBlock} ({@code <html></html>}) 172 * <li>{@link ThematicBreak} (Horizontal Rule) ({@code ---}) 173 * <li>{@link FencedCodeBlock} ({@code ```}) 174 * <li>{@link IndentedCodeBlock} 175 * <li>{@link BlockQuote} ({@code >}) 176 * <li>{@link ListBlock} (Ordered / Unordered List) ({@code 1. / *}) 177 * </ul> 178 * <p> 179 * To parse only a subset of the features listed above, pass a list of each feature's associated {@link Block} class. 180 * <p> 181 * E.g., to only parse headings and lists: 182 * <pre> 183 * {@code 184 * Parser.builder().enabledBlockTypes(new HashSet<>(Arrays.asList(Heading.class, ListBlock.class))); 185 * } 186 * </pre> 187 * 188 * @param enabledBlockTypes A list of block nodes the parser will parse. 189 * If this list is empty, the parser will not recognize any CommonMark core features. 190 * @return {@code this} 191 */ 192 public Builder enabledBlockTypes(Set!TypeInfo_Class enabledBlockTypes) { 193 if (enabledBlockTypes is null) { 194 throw new NullPointerException("enabledBlockTypes must not be null"); 195 } 196 this._enabledBlockTypes = enabledBlockTypes; 197 return this; 198 } 199 200 /** 201 * Adds a custom block parser factory. 202 * <p> 203 * Note that custom factories are applied <em>before</em> the built-in factories. This is so that 204 * extensions can change how some syntax is parsed that would otherwise be handled by built-in factories. 205 * "With great power comes great responsibility." 206 * 207 * @param blockParserFactory a block parser factory implementation 208 * @return {@code this} 209 */ 210 public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { 211 blockParserFactories.add(blockParserFactory); 212 return this; 213 } 214 215 /** 216 * Adds a custom delimiter processor. 217 * <p> 218 * Note that multiple delimiter processors with the same characters can be added, as long as they have a 219 * different minimum length. In that case, the processor with the shortest matching length is used. Adding more 220 * than one delimiter processor with the same character and minimum length is invalid. 221 * 222 * @param delimiterProcessor a delimiter processor implementation 223 * @return {@code this} 224 */ 225 public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) { 226 delimiterProcessors.add(delimiterProcessor); 227 return this; 228 } 229 230 public Builder postProcessor(PostProcessor postProcessor) { 231 postProcessors.add(postProcessor); 232 return this; 233 } 234 235 /** 236 * Overrides the parser used for inline markdown processing. 237 * <p> 238 * Provide an implementation of InlineParserFactory which provides a custom inline parser 239 * to modify how the following are parsed: 240 * bold (**) 241 * italic (*) 242 * strikethrough (~~) 243 * backtick quote (`) 244 * link ([title](http://)) 245 * image () 246 * <p> 247 * <p> 248 * Note that if this method is not called or the inline parser factory is set to null, then the default 249 * implementation will be used. 250 * 251 * @param inlineParserFactory an inline parser factory implementation 252 * @return {@code this} 253 */ 254 public Builder inlineParserFactory(InlineParserFactory inlineParserFactory) { 255 this._inlineParserFactory = inlineParserFactory; 256 return this; 257 } 258 } 259 260 /** 261 * Extension for {@link Parser}. 262 */ 263 public interface ParserExtension : Extension { 264 void extend(Builder parserBuilder); 265 } 266 }