wayver's git archive


an obsidian renderer
git clone https://git.wayver.dev/sable

sable-markdown/src/ast/mod.rs@main

raw
Date Commit Message Author Files + -
2026-02-23 01:55 initial mvp wayverd 139 17808 0
...

1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//!           └─ Block ─┐
13//!                     ├─ Inline
14//!                     └─ ...
15//! ```
16
17// ——————————————————————————————————————————————————————————————————————————
18// Document root
19// ——————————————————————————————————————————————————————————————————————————
20
21/// Root of a Markdown document
22#[derive(Debug, Clone, PartialEq)]
23pub struct Document {
24    /// Top‑level block sequence **in document order**.
25    pub blocks: Vec<Block>,
26}
27
28// ——————————————————————————————————————————————————————————————————————————
29// Block‑level nodes
30// ——————————————————————————————————————————————————————————————————————————
31
32/// Block‑level constructs in the order they appear in the CommonMark spec.
33#[derive(Debug, Clone, PartialEq)]
34pub enum Block {
35    /// Ordinary paragraph
36    Paragraph(Vec<Inline>),
37
38    /// ATX (`# Heading`) or Setext (`===`) heading
39    Heading(Heading),
40
41    /// Thematic break (horizontal rule)
42    ThematicBreak,
43
44    /// Block quote
45    BlockQuote(Vec<Block>),
46
47    /// List (bullet or ordered)
48    List(List),
49
50    /// Fenced or indented code block
51    CodeBlock(CodeBlock),
52
53    /// Raw HTML block
54    HtmlBlock(String),
55
56    /// Link reference definition.  Preserved for round‑tripping.
57    Definition(LinkDefinition),
58
59    /// Tables
60    Table(Table),
61
62    /// Footnote definition
63    FootnoteDefinition(FootnoteDefinition),
64
65    /// Callout
66    Callout(Callout),
67
68    /// Empty block. This is used to represent skipped blocks in the AST.
69    Empty,
70}
71
72/// Heading with level 1–6 and inline content.
73#[derive(Debug, Clone, PartialEq, Eq)]
74pub struct Heading {
75    /// Kind of heading (ATX or Setext) together with the level.
76    pub kind: HeadingKind,
77
78    /// Inlines that form the heading text (before trimming).
79    pub content: Vec<Inline>,
80}
81
82/// Heading with level 1–6 and inline content.
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub enum HeadingKind {
85    /// ATX heading (`# Heading`)
86    Atx(u8),
87
88    /// Setext heading (`===` or `---`)
89    Setext(SetextHeading),
90}
91
92/// Setext heading with level and underline type.
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
94pub enum SetextHeading {
95    /// Setext heading with `=` underline
96    Level1,
97
98    /// Setext heading with `-` underline
99    Level2,
100}
101
102// ——————————————————————————————————————————————————————————————————————————
103// Lists
104// ——————————————————————————————————————————————————————————————————————————
105
106/// A list container — bullet or ordered.
107#[derive(Debug, Clone, PartialEq)]
108pub struct List {
109    /// Kind of list together with additional semantic data (start index or
110    /// bullet marker).
111    pub kind: ListKind,
112
113    /// List items in source order.
114    pub items: Vec<ListItem>,
115}
116
117/// Specifies *what kind* of list we have.
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
119pub enum ListKind {
120    /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
121    Ordered(ListOrderedKindOptions),
122
123    /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
124    Bullet(ListBulletKind),
125}
126
127/// Specifies *what kind* of list we have.
128#[derive(Debug, Clone, Copy, PartialEq, Eq)]
129pub struct ListOrderedKindOptions {
130    /// Start index (1, 2, …) for ordered lists.
131    pub start: u64,
132}
133
134/// Concrete bullet character used for a bullet list.
135#[derive(Debug, Clone, Copy, PartialEq, Eq)]
136pub enum ListBulletKind {
137    /// `-` U+002D
138    Dash,
139
140    /// `*` U+002A
141    Star,
142
143    /// `+` U+002B
144    Plus,
145}
146
147/// Item within a list.
148#[derive(Debug, Clone, PartialEq)]
149pub struct ListItem {
150    /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
151    pub task: Option<TaskState>,
152
153    /// Nested blocks inside the list item.
154    pub blocks: Vec<Block>,
155}
156
157/// State of a task‑list checkbox.
158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub enum TaskState {
160    /// Unchecked (GFM task‑list item)
161    Incomplete,
162
163    /// Checked (GFM task‑list item)
164    Complete,
165}
166
167// ——————————————————————————————————————————————————————————————————————————
168// Code blocks
169// ——————————————————————————————————————————————————————————————————————————
170
171/// Fenced or indented code block.
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct CodeBlock {
174    /// Distinguishes indented vs fenced code and stores the *info string*.
175    pub kind: CodeBlockKind,
176
177    /// Literal text inside the code block **without** final newline trimming.
178    pub literal: String,
179}
180
181/// The concrete kind of a code block.
182#[derive(Debug, Clone, PartialEq, Eq)]
183pub enum CodeBlockKind {
184    /// Indented block (≥ 4 spaces or 1 tab per line).
185    Indented,
186
187    /// Fenced block with *optional* info string (language, etc.).
188    Fenced { info: Option<String> },
189}
190
191// ——————————————————————————————————————————————————————————————————————————
192// Link reference definitions
193// ——————————————————————————————————————————————————————————————————————————
194
195/// Link reference definition (GFM) with a label, destination and optional title.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub struct LinkDefinition {
198    /// Link label (acts as the *identifier*).
199    pub label: Vec<Inline>,
200
201    /// Link URL (absolute or relative) or email address.
202    pub destination: String,
203
204    /// Optional title (for links and images).
205    pub title: Option<String>,
206}
207
208// ——————————————————————————————————————————————————————————————————————————
209// Tables
210// ——————————————————————————————————————————————————————————————————————————
211
212/// A table is a collection of rows and columns with optional alignment.
213/// The first row is the header row.
214#[derive(Debug, Clone, PartialEq, Eq)]
215pub struct Table {
216    /// Each row is a vector of *cells*; header row is **row 0**.
217    pub rows: Vec<TableRow>,
218
219    /// Column alignment; `alignments.len() == column_count`.
220    pub alignments: Vec<Alignment>,
221}
222
223/// A table row is a vector of cells (columns).
224pub type TableRow = Vec<TableCell>;
225
226/// A table cell is a vector of inlines (text, links, etc.).
227pub type TableCell = Vec<Inline>;
228
229/// Specifies the alignment of a table cell.
230#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
231pub enum Alignment {
232    /// No alignment specified
233    None,
234
235    /// Left aligned
236    #[default]
237    Left,
238
239    /// Right aligned
240    Center,
241
242    /// Right aligned
243    Right,
244}
245
246// ——————————————————————————————————————————————————————————————————————————
247// Footnotes
248// ——————————————————————————————————————————————————————————————————————————
249
250#[derive(Debug, Clone, PartialEq)]
251pub struct FootnoteDefinition {
252    /// Normalized label (without leading `^`).
253    pub label: String,
254
255    /// Footnote content (blocks).
256    pub blocks: Vec<Block>,
257}
258
259// ——————————————————————————————————————————————————————————————————————————
260// Callouts
261// ——————————————————————————————————————————————————————————————————————————
262
263#[derive(Debug, Clone, PartialEq)]
264pub struct Callout {
265    pub level: String,
266
267    pub title: Option<String>,
268
269    pub foldable: bool,
270
271    pub open: bool,
272
273    /// Footnote content (blocks).
274    pub blocks: Vec<Block>,
275}
276
277// ——————————————————————————————————————————————————————————————————————————
278// Inline‑level nodes
279// ——————————————————————————————————————————————————————————————————————————
280
281#[derive(Debug, Clone, PartialEq, Hash, Eq)]
282pub enum Inline {
283    /// Plain text (decoded entity references, preserved backslash escapes).
284    Text(String),
285
286    /// Hard line break
287    LineBreak,
288
289    /// Inline code span
290    Code(String),
291
292    /// Raw HTML fragment
293    Html(String),
294
295    /// Link to a destination with optional title.
296    Link(Link),
297
298    /// Reference link
299    LinkReference(LinkReference),
300
301    /// OFM: Tag (`#tag`)
302    Tag(Tag),
303    /// OFM: Wikilink (`[[Link]]` / `[[Link|Title]]`)
304    Wikilink(Wikilink),
305
306    /// Image with optional title.
307    Image(Image),
308
309    /// Emphasis (`*` / `_`)
310    Emphasis(Vec<Inline>),
311    /// Strong emphasis (`**` / `__`)
312    Strong(Vec<Inline>),
313    /// Strikethrough (`~~`)
314    Strikethrough(Vec<Inline>),
315
316    /// Autolink (`<https://>` or `<mailto:…>`)
317    Autolink(String),
318
319    /// Footnote reference (`[^label]`)
320    FootnoteReference(String),
321
322    /// Empty element. This is used to represent skipped elements in the AST.
323    Empty,
324}
325
326/// Re‑usable structure for links and images (destination + children).
327#[derive(Debug, Clone, PartialEq, Hash, Eq)]
328pub struct Link {
329    /// Destination URL (absolute or relative) or email address.
330    pub destination: String,
331
332    /// Optional title (for links and images).
333    pub title: Option<String>,
334
335    /// Inline content (text, code, etc.) inside the link or image.
336    pub children: Vec<Inline>,
337}
338
339/// Re‑usable structure for links and images (destination + children).
340#[derive(Debug, Clone, PartialEq, Hash, Eq)]
341pub struct Image {
342    /// Image URL (absolute or relative).
343    pub destination: String,
344
345    /// Optional title.
346    pub title: Option<String>,
347
348    /// Alternative text.
349    pub alt: String,
350}
351
352#[derive(Debug, Clone, PartialEq, Hash, Eq)]
353pub struct LinkReference {
354    /// Link label (acts as the *identifier*).
355    pub label: Vec<Inline>,
356
357    /// Link text
358    pub text: Vec<Inline>,
359}
360
361#[derive(Debug, Clone, PartialEq, Hash, Eq)]
362pub struct Tag {
363    /// Tag text
364    pub text: String,
365}
366
367#[derive(Debug, Clone, PartialEq, Hash, Eq)]
368pub struct Wikilink {
369    /// Wikilink link
370    pub link: String,
371
372    /// Wikilink target
373    pub target: Option<String>,
374
375    /// Wikilink name
376    pub name: Option<String>,
377}
378