sable-markdown/src/parser/blocks/html_block.rs@main
raw
1use nom::{
2 IResult, Parser,
3 branch::alt,
4 bytes::complete::{tag, tag_no_case},
5 character::complete::{
6 alpha1, alphanumeric1, anychar, char, line_ending, one_of, satisfy, space0, space1,
7 },
8 combinator::{eof, not, opt, peek, recognize, value, verify},
9 multi::{many_m_n, many0, many1},
10 sequence::{delimited, pair, preceded, terminated},
11};
12
13pub(super) fn html_block() -> impl FnMut(&str) -> IResult<&str, &str> {
14 move |input: &str| {
15 alt((
16 html_block1(),
17 html_block2(),
18 html_block3(),
19 html_block4(),
20 html_block5(),
21 html_block6(),
22 html_block7(),
23 ))
24 .parse(input)
25 }
26}
27
28fn html_block1() -> impl FnMut(&str) -> IResult<&str, &str> {
29 move |input: &str| {
30 let tag_variant_parser = || {
31 alt((
32 tag_no_case("script"),
33 tag_no_case("pre"),
34 tag_no_case("style"),
35 ))
36 };
37
38 let end_parser = || delimited(tag("</"), tag_variant_parser(), char('>'));
39
40 preceded(
41 many_m_n(0, 3, char(' ')),
42 recognize((
43 char('<'),
44 tag_variant_parser(),
45 alt((
46 value((), char(' ')),
47 value((), char('>')),
48 value((), line_ending),
49 )),
50 many0(pair(peek(not(end_parser())), anychar)),
51 end_parser(),
52 )),
53 )
54 .parse(input)
55 }
56}
57
58fn html_block2() -> impl FnMut(&str) -> IResult<&str, &str> {
59 move |input: &str| {
60 preceded(
61 many_m_n(0, 3, char(' ')),
62 recognize((
63 tag("<!--"),
64 many0(pair(peek(not(tag("-->"))), anychar)),
65 tag("-->"),
66 )),
67 )
68 .parse(input)
69 }
70}
71
72fn html_block3() -> impl FnMut(&str) -> IResult<&str, &str> {
73 move |input: &str| {
74 preceded(
75 many_m_n(0, 3, char(' ')),
76 recognize((
77 tag("<?"),
78 many0(pair(peek(not(tag("?>"))), anychar)),
79 tag("?>"),
80 )),
81 )
82 .parse(input)
83 }
84}
85
86fn html_block4() -> impl FnMut(&str) -> IResult<&str, &str> {
87 move |input: &str| {
88 preceded(
89 many_m_n(0, 3, char(' ')),
90 recognize((
91 tag("<!"),
92 satisfy(|c| c.is_ascii_uppercase()),
93 many0(pair(peek(not(char('>'))), anychar)),
94 tag(">"),
95 )),
96 )
97 .parse(input)
98 }
99}
100
101fn html_block5() -> impl FnMut(&str) -> IResult<&str, &str> {
102 move |input: &str| {
103 preceded(
104 many_m_n(0, 3, char(' ')),
105 recognize((
106 tag("<![CDATA["),
107 many0(pair(peek(not(tag("]]>"))), anychar)),
108 tag("]]>"),
109 )),
110 )
111 .parse(input)
112 }
113}
114
115fn html_block6() -> impl FnMut(&str) -> IResult<&str, &str> {
116 move |input: &str| {
117 let tag_variant = alt((
118 alt((
119 tag_no_case("address"),
120 tag_no_case("article"),
121 tag_no_case("aside"),
122 tag_no_case("a"),
123 tag_no_case("base"),
124 tag_no_case("basefont"),
125 tag_no_case("blockquote"),
126 tag_no_case("body"),
127 tag_no_case("caption"),
128 tag_no_case("center"),
129 tag_no_case("col"),
130 tag_no_case("colgroup"),
131 )),
132 alt((
133 tag_no_case("dd"),
134 tag_no_case("details"),
135 tag_no_case("dialog"),
136 tag_no_case("dir"),
137 tag_no_case("div"),
138 tag_no_case("dl"),
139 tag_no_case("dt"),
140 tag_no_case("fieldset"),
141 tag_no_case("figcaption"),
142 tag_no_case("figure"),
143 tag_no_case("footer"),
144 tag_no_case("form"),
145 tag_no_case("frame"),
146 tag_no_case("frameset"),
147 )),
148 alt((
149 tag_no_case("h1"),
150 tag_no_case("h2"),
151 tag_no_case("h3"),
152 tag_no_case("h4"),
153 tag_no_case("h5"),
154 tag_no_case("h6"),
155 tag_no_case("head"),
156 tag_no_case("header"),
157 tag_no_case("hr"),
158 tag_no_case("html"),
159 tag_no_case("iframe"),
160 tag_no_case("legend"),
161 )),
162 alt((
163 tag_no_case("li"),
164 tag_no_case("link"),
165 tag_no_case("main"),
166 tag_no_case("menu"),
167 tag_no_case("menuitem"),
168 tag_no_case("nav"),
169 tag_no_case("noframes"),
170 tag_no_case("ol"),
171 tag_no_case("optgroup"),
172 tag_no_case("option"),
173 tag_no_case("p"),
174 tag_no_case("param"),
175 )),
176 alt((
177 tag_no_case("section"),
178 tag_no_case("source"),
179 tag_no_case("span"),
180 tag_no_case("summary"),
181 tag_no_case("table"),
182 tag_no_case("tbody"),
183 tag_no_case("td"),
184 tag_no_case("tfoot"),
185 tag_no_case("th"),
186 tag_no_case("thead"),
187 tag_no_case("title"),
188 tag_no_case("tr"),
189 tag_no_case("track"),
190 tag_no_case("ul"),
191 )),
192 ));
193 let end_parser = || {
194 alt((
195 value((), terminated(line_ending, (space0, line_ending))),
196 value((), eof),
197 ))
198 };
199
200 preceded(
201 many_m_n(0, 3, char(' ')),
202 recognize((
203 alt((value((), tag("</")), value((), char('<')))),
204 tag_variant,
205 alt((
206 value((), char(' ')),
207 value((), line_ending),
208 value((), tag("/>")),
209 value((), char('>')),
210 )),
211 many0(pair(peek(not(end_parser())), anychar)),
212 opt(line_ending),
213 )),
214 )
215 .parse(input)
216 }
217}
218
219fn html_block7() -> impl FnMut(&str) -> IResult<&str, &str> {
220 move |input: &str| {
221 let end_parser = || {
222 alt((
223 value((), (line_ending, space0, line_ending)),
224 value((), eof),
225 ))
226 };
227
228 preceded(
229 many_m_n(0, 3, char(' ')),
230 recognize((
231 alt((
232 complete_open_html_tag(&["script", "pre", "style"]),
233 complete_closing_html_tag,
234 )),
235 alt((value((), line_ending), value((), char(' ')))),
236 many0(pair(peek(not(end_parser())), anychar)),
237 end_parser(),
238 )),
239 )
240 .parse(input)
241 }
242}
243
244fn complete_open_html_tag(
245 restricted_tags: &'static [&'static str],
246) -> impl FnMut(&str) -> IResult<&str, &str> {
247 move |input: &str| {
248 recognize((
249 char('<'),
250 verify(html_tag_name, |s: &str| {
251 !restricted_tags
252 .iter()
253 .any(|tag| tag.eq_ignore_ascii_case(s))
254 }),
255 many0(html_tag_attribute),
256 space0,
257 opt(char('/')),
258 char('>'),
259 ))
260 .parse(input)
261 }
262}
263
264fn complete_closing_html_tag(input: &str) -> IResult<&str, &str> {
265 recognize((tag("</"), html_tag_name, space0, char('>'))).parse(input)
266}
267
268fn html_tag_name(input: &str) -> IResult<&str, &str> {
269 recognize((
270 alpha1,
271 many0(alt((value((), char('-')), value((), alphanumeric1)))),
272 ))
273 .parse(input)
274}
275
276fn html_tag_attribute(input: &str) -> IResult<&str, &str> {
277 recognize((
278 space1,
279 html_tag_attribute_name,
280 opt(html_tag_attribute_value_specification),
281 ))
282 .parse(input)
283}
284
285fn html_tag_attribute_name(input: &str) -> IResult<&str, &str> {
286 recognize((
287 alt((value((), alpha1), value((), one_of("_:")))),
288 many0(alt((value((), one_of("_.:-")), value((), alphanumeric1)))),
289 ))
290 .parse(input)
291}
292
293fn html_tag_attribute_value_specification(input: &str) -> IResult<&str, &str> {
294 recognize((space0, char('='), space0, html_tag_attribute_value)).parse(input)
295}
296
297fn html_tag_attribute_value(input: &str) -> IResult<&str, &str> {
298 alt((
299 html_tag_attribute_value_unquoted,
300 html_tag_attribute_value_single_quoted,
301 html_tag_attribute_value_double_quoted,
302 ))
303 .parse(input)
304}
305
306fn html_tag_attribute_value_unquoted(input: &str) -> IResult<&str, &str> {
307 recognize(many1(pair(
308 peek(not(alt((value((), space1), value((), one_of("\"'=<>`")))))),
309 anychar,
310 )))
311 .parse(input)
312}
313
314fn html_tag_attribute_value_single_quoted(input: &str) -> IResult<&str, &str> {
315 recognize(delimited(
316 char('\''),
317 pair(peek(not(char('\''))), anychar),
318 char('\''),
319 ))
320 .parse(input)
321}
322
323fn html_tag_attribute_value_double_quoted(input: &str) -> IResult<&str, &str> {
324 recognize(delimited(
325 char('"'),
326 pair(peek(not(char('"'))), anychar),
327 char('"'),
328 ))
329 .parse(input)
330}
331