hunt.markdown.parser.Parser source code

1 module hunt.markdown.parser.Parser;
2 
3 import hunt.markdown.Extension;
4 import hunt.markdown.internal.DocumentParser;
5 import hunt.markdown.internal.InlineParserImpl;
6 import hunt.markdown.node.Node;
7 import hunt.markdown.node.Block;
8 import hunt.markdown.parser.block.BlockParserFactory;
9 import hunt.markdown.parser.delimiter.DelimiterProcessor;
10 import hunt.markdown.parser.InlineParserContext;
11 import hunt.markdown.parser.InlineParserFactory;
12 import hunt.markdown.parser.InlineParser;
13 import hunt.markdown.parser.PostProcessor;
14 
15 import hunt.Exceptions;
16 import hunt.util.Common;
17 import hunt.collection.ArrayList;
18 import hunt.collection.List;
19 import hunt.collection.Set;
20 
21 /**
22  * Parses input text to a tree of nodes.
23  * <p>
24  * Start with the {@link #builder} method, configure the parser and build it. Example:
25  * <pre><code>
26  * Parser parser = Parser.builder().build();
27  * Node document = parser.parse("input text");
28  * </code></pre>
29  */
30 class Parser {
31 
32     private List!(BlockParserFactory) blockParserFactories;
33     private List!(DelimiterProcessor) delimiterProcessors;
34     private InlineParserFactory _inlineParserFactory;
35     private List!(PostProcessor) postProcessors;
36 
37     private this(Builder builder) {
38         this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder._enabledBlockTypes);
39         this._inlineParserFactory = builder._inlineParserFactory;
40         this.postProcessors = builder.postProcessors;
41         this.delimiterProcessors = builder.delimiterProcessors;
42 
43         // Try to construct an inline parser. This might raise exceptions in case of invalid configuration.
44         getInlineParser();
45     }
46 
47     /**
48      * Create a new builder for configuring a {@link Parser}.
49      *
50      * @return a builder
51      */
52     public static Builder builder() {
53         return new Builder();
54     }
55 
56     /**
57      * Parse the specified input text into a tree of nodes.
58      * <p>
59      * This method is thread-safe (a new parser state is used for each invocation).
60      *
61      * @param input the text to parse
62      * @return the root node
63      */
64     public Node parse(string input) {
65         InlineParser inlineParser = getInlineParser();
66         DocumentParser documentParser = new DocumentParser(blockParserFactories, inlineParser);
67         Node document = documentParser.parse(input);
68         return postProcess(document);
69     }
70 
71     /**
72      * Parse the specified reader into a tree of nodes. The caller is responsible for closing the reader.
73      * <pre><code>
74      * Parser parser = Parser.builder().build();
75      * try (InputStreamReader reader = new InputStreamReader(new FileInputStream("file.md"), StandardCharsets.UTF_8)) {
76      *     Node document = parser.parseReader(reader);
77      *     // ...
78      * }
79      * </code></pre>
80      * Note that if you have a file with a byte order mark (BOM), you need to skip it before handing the reader to this
81      * library. There's existing classes that do that, e.g. see {@code BOMInputStream} in Commons IO.
82      * <p>
83      * This method is thread-safe (a new parser state is used for each invocation).
84      *
85      * @param input the reader to parse
86      * @return the root node
87      * @throws IOException when reading throws an exception
88      */
89     // public Node parseReader(Reader input)
90     // {
91     //     InlineParser inlineParser = getInlineParser();
92     //     DocumentParser documentParser = new DocumentParser(blockParserFactories, inlineParser);
93     //     Node document = documentParser.parse(input);
94     //     return postProcess(document);
95     // }
96 
97     private InlineParser getInlineParser() {
98         if (this._inlineParserFactory is null) {
99             return new InlineParserImpl(delimiterProcessors);
100         } else {
101             CustomInlineParserContext inlineParserContext = new CustomInlineParserContext(delimiterProcessors);
102             return this._inlineParserFactory.create(inlineParserContext);
103         }
104     }
105 
106     private Node postProcess(Node document) {
107         foreach (PostProcessor postProcessor ; postProcessors) {
108             document = postProcessor.process(document);
109         }
110         return document;
111     }
112 
113     private class CustomInlineParserContext : InlineParserContext {
114 
115         private List!(DelimiterProcessor) delimiterProcessors;
116 
117         this(List!(DelimiterProcessor) delimiterProcessors) {
118             this.delimiterProcessors = delimiterProcessors;
119         }
120 
121         override public List!(DelimiterProcessor) getCustomDelimiterProcessors() {
122             return delimiterProcessors;
123         }
124     }
125 
126     /**
127      * Builder for configuring a {@link Parser}.
128      */
129     public static class Builder {
130         private List!(BlockParserFactory) blockParserFactories;
131         private List!(DelimiterProcessor) delimiterProcessors;
132         private List!(PostProcessor) postProcessors;
133         private Set!(TypeInfo_Class) _enabledBlockTypes;
134         private InlineParserFactory _inlineParserFactory = null;
135 
136         this()
137         {
138             blockParserFactories = new ArrayList!(BlockParserFactory)();
139             delimiterProcessors = new ArrayList!(DelimiterProcessor)();
140             postProcessors = new ArrayList!(PostProcessor)();
141             _enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes();
142         }
143 
144         /**
145          * @return the configured {@link Parser}
146          */
147         public Parser build() {
148             return new Parser(this);
149         }
150 
151         /**
152          * @param extensions extensions to use on this parser
153          * @return {@code this}
154          */
155         public Builder extensions(Iterable!Extension extensions) {
156             foreach (Extension extension ; extensions) {
157                 if (cast(ParserExtension)extension !is null) {
158                     ParserExtension parserExtension = cast(ParserExtension) extension;
159                     parserExtension.extend(this);
160                 }
161             }
162             return this;
163         }
164 
165         /**
166          * Describe the list of markdown features the parser will recognize and parse.
167          * <p>
168          * By default, CommonMark will recognize and parse the following set of "block" elements:
169          * <ul>
170          * <li>{@link Heading} ({@code #})
171          * <li>{@link HtmlBlock} ({@code <html></html>})
172          * <li>{@link ThematicBreak} (Horizontal Rule) ({@code ---})
173          * <li>{@link FencedCodeBlock} ({@code ```})
174          * <li>{@link IndentedCodeBlock}
175          * <li>{@link BlockQuote} ({@code >})
176          * <li>{@link ListBlock} (Ordered / Unordered List) ({@code 1. / *})
177          * </ul>
178          * <p>
179          * To parse only a subset of the features listed above, pass a list of each feature's associated {@link Block} class.
180          * <p>
181          * E.g., to only parse headings and lists:
182          * <pre>
183          *     {@code
184          *     Parser.builder().enabledBlockTypes(new HashSet<>(Arrays.asList(Heading.class, ListBlock.class)));
185          *     }
186          * </pre>
187          *
188          * @param enabledBlockTypes A list of block nodes the parser will parse.
189          * If this list is empty, the parser will not recognize any CommonMark core features.
190          * @return {@code this}
191          */
192         public Builder enabledBlockTypes(Set!TypeInfo_Class enabledBlockTypes) {
193             if (enabledBlockTypes is null) {
194                 throw new NullPointerException("enabledBlockTypes must not be null");
195             }
196             this._enabledBlockTypes = enabledBlockTypes;
197             return this;
198         }
199 
200         /**
201          * Adds a custom block parser factory.
202          * <p>
203          * Note that custom factories are applied <em>before</em> the built-in factories. This is so that
204          * extensions can change how some syntax is parsed that would otherwise be handled by built-in factories.
205          * "With great power comes great responsibility."
206          *
207          * @param blockParserFactory a block parser factory implementation
208          * @return {@code this}
209          */
210         public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) {
211             blockParserFactories.add(blockParserFactory);
212             return this;
213         }
214 
215         /**
216          * Adds a custom delimiter processor.
217          * <p>
218          * Note that multiple delimiter processors with the same characters can be added, as long as they have a
219          * different minimum length. In that case, the processor with the shortest matching length is used. Adding more
220          * than one delimiter processor with the same character and minimum length is invalid.
221          *
222          * @param delimiterProcessor a delimiter processor implementation
223          * @return {@code this}
224          */
225         public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) {
226             delimiterProcessors.add(delimiterProcessor);
227             return this;
228         }
229 
230         public Builder postProcessor(PostProcessor postProcessor) {
231             postProcessors.add(postProcessor);
232             return this;
233         }
234 
235         /**
236          * Overrides the parser used for inline markdown processing.
237          * <p>
238          * Provide an implementation of InlineParserFactory which provides a custom inline parser
239          * to modify how the following are parsed:
240          * bold (**)
241          * italic (*)
242          * strikethrough (~~)
243          * backtick quote (`)
244          * link ([title](http://))
245          * image (![alt](http://))
246          * <p>
247          * <p>
248          * Note that if this method is not called or the inline parser factory is set to null, then the default
249          * implementation will be used.
250          *
251          * @param inlineParserFactory an inline parser factory implementation
252          * @return {@code this}
253          */
254         public Builder inlineParserFactory(InlineParserFactory inlineParserFactory) {
255             this._inlineParserFactory = inlineParserFactory;
256             return this;
257         }
258     }
259 
260     /**
261      * Extension for {@link Parser}.
262      */
263     public interface ParserExtension : Extension {
264         void extend(Builder parserBuilder);
265     }
266 }