1 module hunt.markdown.internal.util.Escaping; 2 3 import hunt.markdown.internal.util.Html5Entities; 4 5 // import java.nio.charset.Charset; 6 import std.algorithm.searching; 7 // import hunt.time.util.Locale; 8 import hunt.text; 9 import hunt.util.StringBuilder; 10 import std.regex; 11 import std.string; 12 import hunt.markdown.internal.util.Common; 13 14 class Escaping { 15 16 public enum string ESCAPABLE = "[!\"#$%&\'()*+,./:;<=>?@\\[\\\\\\]^_`{|}~-]"; 17 18 private enum string ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; 19 20 private enum string BACKSLASH_OR_AMP = /* Pattern.compile */"[\\\\&]"; 21 22 private enum string ENTITY_OR_ESCAPED_CHAR = 23 /* Pattern.compile */"\\\\" ~ ESCAPABLE ~ '|' ~ ENTITY; 24 25 private enum string XML_SPECIAL = "[&<>\"]"; 26 27 private enum string XML_SPECIAL_RE = /* Pattern.compile */XML_SPECIAL; 28 29 private enum string XML_SPECIAL_OR_ENTITY = 30 /* Pattern.compile */ENTITY ~ '|' ~ XML_SPECIAL; 31 32 // From RFC 3986 (see "reserved", "unreserved") except don't escape '[' or ']' to be compatible with JS encodeURI 33 private enum string ESCAPE_IN_URI = 34 /* Pattern.compile */"(%[a-fA-F0-9]{0,2}|[^:/?#@!$&'()*+,;=a-zA-Z0-9\\-._~])"; 35 36 private enum char[] HEX_DIGITS =['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']; 37 38 private enum string WHITESPACE = /* Pattern.compile */"[ \t\r\n]+"; 39 40 // private __gshared Replacer UNSAFE_CHAR_REPLACER; 41 42 // private __gshared Replacer UNESCAPE_REPLACER; 43 44 // private __gshared Replacer URI_REPLACER; 45 mixin(MakeGlobalVar!(Replacer)("UNSAFE_CHAR_REPLACER",`new class Replacer { 46 override public void replace(string input, StringBuilder sb) { 47 switch (input) { 48 case "&": 49 sb.append("&"); 50 break; 51 case "<": 52 sb.append("<"); 53 break; 54 case ">": 55 sb.append(">"); 56 break; 57 case "\"": 58 sb.append("""); 59 break; 60 default: 61 sb.append(input); 62 } 63 } 64 }`)); 65 mixin(MakeGlobalVar!(Replacer)("UNESCAPE_REPLACER",`new class Replacer { 66 override public void replace(string input, StringBuilder sb) { 67 if (input[0] == '\\') { 68 sb.append(input, 1, cast(int)input.length); 69 } else { 70 sb.append(Html5Entities.entityToString(input)); 71 } 72 } 73 }`)); 74 mixin(MakeGlobalVar!(Replacer)("URI_REPLACER",`new class Replacer { 75 override public void replace(string input, StringBuilder sb) { 76 if (input.startsWith("%")) { 77 if (input.length == 3) { 78 // Already percent-encoded, preserve 79 sb.append(input); 80 } else { 81 // %25 is the percent-encoding for % 82 sb.append("%25"); 83 sb.append(input, 1, cast(int)input.length); 84 } 85 } else { 86 byte[] bytes = cast(byte[])input/* .getBytes(Charset.forName("UTF-8")) */; 87 foreach (byte b ; bytes) { 88 sb.append('%'); 89 sb.append(HEX_DIGITS[(b >> 4) & 0xF]); 90 sb.append(HEX_DIGITS[b & 0xF]); 91 } 92 } 93 } 94 }`)); 95 96 public static string escapeHtml(string input, bool preserveEntities) { 97 Regex!char p = preserveEntities ? regex(XML_SPECIAL_OR_ENTITY,"i") : regex(XML_SPECIAL_RE); 98 return replaceAll(p, input, UNSAFE_CHAR_REPLACER); 99 } 100 101 /** 102 * Replace entities and backslash escapes with literal characters. 103 */ 104 public static string unescapeString(string s) { 105 if (!matchAll(s,BACKSLASH_OR_AMP).empty()) { 106 return replaceAll(regex(ENTITY_OR_ESCAPED_CHAR,"i"), s, UNESCAPE_REPLACER); 107 } else { 108 return s; 109 } 110 } 111 112 public static string percentEncodeUrl(string s) { 113 return replaceAll(regex(ESCAPE_IN_URI), s, URI_REPLACER); 114 } 115 116 public static string normalizeReference(string input) { 117 // Strip '[' and ']', then strip 118 string stripped = input.substring(1, cast(int)input.length - 1).strip(); 119 string lowercase = stripped.toLower(/* Locale.ROOT */); 120 return std.regex.replaceAll(lowercase,regex(WHITESPACE)," "); 121 } 122 123 private static string replaceAll(Regex!char p, string s, Replacer replacer) { 124 auto matchers = matchAll(s,p); 125 126 if (matchers.empty()) { 127 return s; 128 } 129 130 StringBuilder sb = new StringBuilder(s.length + 16); 131 int lastEnd = 0; 132 // do { 133 // sb.append(s, lastEnd, matcher.start()); 134 // replacer.replace(matcher.group(), sb); 135 // lastEnd = matcher.end(); 136 // } while (matcher.find()); 137 int offset = 0; 138 foreach(matcher; matchers) { 139 auto cap = matcher.captures[0]; 140 auto start =cast(int)(s[offset..$].indexOf(cap)) + offset; 141 sb.append(s, lastEnd, start); 142 replacer.replace(cap, sb); 143 lastEnd = start + cast(int)(cap.length); 144 offset = lastEnd; 145 } 146 147 if (lastEnd != s.length) { 148 sb.append(s, lastEnd, cast(int)s.length); 149 } 150 return sb.toString(); 151 } 152 153 private interface Replacer { 154 void replace(string input, StringBuilder sb); 155 } 156 }