Skip to main content

lychee_lib/extract/
markdown.rs

1//! Extract links and fragments from markdown documents
2use std::collections::{HashMap, HashSet};
3
4use log::warn;
5use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeWithOffset};
6
7use crate::{
8    checker::wikilink::wikilink,
9    extract::{html::html5gum::extract_html_with_span, plaintext::extract_raw_uri_from_plaintext},
10    types::uri::raw::{
11        OffsetSpanProvider, RawUri, RawUriSpan, SourceSpanProvider, SpanProvider as _,
12    },
13};
14
15use super::html::html5gum::extract_html_fragments;
16
17/// Returns the default markdown extensions used by lychee.
18/// Sadly, `|` is not const for `Options` so we can't use a const global.
19fn md_extensions() -> Options {
20    Options::ENABLE_HEADING_ATTRIBUTES
21        | Options::ENABLE_MATH
22        | Options::ENABLE_WIKILINKS
23        | Options::ENABLE_FOOTNOTES
24}
25
26/// Extract unparsed URL strings from a Markdown string.
27// TODO: Refactor the extractor to reduce the complexity and number of lines.
28#[allow(clippy::too_many_lines)]
29pub(crate) fn extract_markdown(
30    input: &str,
31    include_verbatim: bool,
32    include_wikilinks: bool,
33) -> Vec<RawUri> {
34    // In some cases it is undesirable to extract links from within code blocks,
35    // which is why we keep track of entries and exits while traversing the input.
36    let mut inside_code_block = false;
37    let mut inside_link_block = false;
38    let mut inside_wikilink_block = false;
39
40    // HTML blocks come in chunks from pulldown_cmark, so we need to accumulate them
41    let mut inside_html_block = false;
42    let mut html_block_buffer = String::new();
43    let mut html_block_start_offset = 0;
44
45    let span_provider = SourceSpanProvider::from_input(input);
46    let parser =
47        TextMergeWithOffset::new(Parser::new_ext(input, md_extensions()).into_offset_iter());
48    parser
49        .filter_map(|(event, span)| match event {
50            // A link.
51            Event::Start(Tag::Link {
52                link_type,
53                dest_url,
54                ..
55            }) => {
56                // Note: Explicitly listing all link types below to make it easier to
57                // change the behavior for a specific link type in the future.
58                #[allow(clippy::match_same_arms)]
59                match link_type {
60                    // Inline link like `[foo](bar)`
61                    // This is the most common link type
62                    LinkType::Inline => {
63                        inside_link_block = true;
64                        Some(raw_uri(&dest_url, span_provider.span(span.start)))
65                    }
66                    // Reference without destination in the document, but resolved by the `broken_link_callback`
67                    LinkType::Reference |
68                    // Collapsed link like `[foo][]`
69                    LinkType::ReferenceUnknown |
70                    // Collapsed link like `[foo][]`
71                    LinkType::Collapsed|
72                    // Collapsed link without destination in the document, but resolved by the `broken_link_callback`
73                    LinkType::CollapsedUnknown |
74                    // Shortcut link like `[foo]`
75                    LinkType::Shortcut |
76                    // Shortcut without destination in the document, but resolved by the `broken_link_callback`
77                    LinkType::ShortcutUnknown => {
78                        inside_link_block = true;
79                        // For reference links, create RawUri directly to handle relative file paths
80                        // that linkify doesn't recognize as URLs
81                        Some(raw_uri(&dest_url, span_provider.span(span.start)))
82                    },
83                    // Autolink like `<http://foo.bar/baz>`
84                    LinkType::Autolink |
85                    // Email address in autolink like `<john@example.org>`
86                    LinkType::Email => {
87                        let span_provider = get_email_span_provider(&span_provider, &span, link_type);
88                        Some(extract_raw_uri_from_plaintext(&dest_url, &span_provider))
89                    }
90                    // Wiki URL (`[[http://example.com]]`)
91                    LinkType::WikiLink { has_pothole } => {
92                        // Exclude WikiLinks if not explicitly enabled
93                        if !include_wikilinks {
94                            return None;
95                        }
96                        inside_wikilink_block = true;
97                        // Ignore gitlab toc notation: https://docs.gitlab.com/user/markdown/#table-of-contents
98                        if ["_TOC_".to_string(), "TOC".to_string()].contains(&dest_url.to_string()) {
99                            return None;
100                        }
101
102                        if let Ok(wikilink) = wikilink(&dest_url, has_pothole) {
103                            Some(vec![RawUri {
104                                text: wikilink.to_string(),
105                                element: Some("a".to_string()),
106                                attribute: Some("wikilink".to_string()),
107                                // wiki links start with `[[`, so offset the span by `2`
108                                span: span_provider.span(span.start + 2)
109                            }])
110                        } else {
111                            warn!("The wikilink destination url {dest_url} could not be cleaned by removing potholes and fragments");
112                            None
113                        }
114                    }
115                }
116            }
117
118            Event::Start(Tag::Image { dest_url, .. }) => Some(extract_image(&dest_url, span_provider.span(span.start))),
119
120            // A code block (inline or fenced).
121            Event::Start(Tag::CodeBlock(_)) => {
122                inside_code_block = true;
123                None
124            }
125            Event::End(TagEnd::CodeBlock) => {
126                inside_code_block = false;
127                None
128            }
129
130            // A text node.
131            Event::Text(txt) => {
132                if inside_wikilink_block
133                    || (inside_link_block && !include_verbatim)
134                    || (inside_code_block && !include_verbatim) {
135                    None
136                } else {
137                    Some(extract_raw_uri_from_plaintext(
138                        &txt,
139                        &OffsetSpanProvider { offset: span.start, inner: &span_provider }
140                    ))
141                }
142            }
143
144            // Start of an HTML block
145            Event::Start(Tag::HtmlBlock) => {
146                inside_html_block = true;
147                html_block_buffer.clear();
148                html_block_start_offset = span.start;
149                None
150            }
151
152            // End of an HTML block - process accumulated HTML
153            Event::End(TagEnd::HtmlBlock) => {
154                inside_html_block = false;
155                if html_block_buffer.is_empty() {
156                    None
157                } else {
158                    Some(extract_html_with_span(
159                        &html_block_buffer,
160                        include_verbatim,
161                        OffsetSpanProvider {
162                            offset: html_block_start_offset,
163                            inner: &span_provider
164                        }
165                    ))
166                }
167            }
168
169            // An HTML node
170            Event::Html(html) => {
171                if inside_html_block {
172                    // Accumulate HTML chunks within a block
173                    html_block_buffer.push_str(&html);
174                    None
175                } else {
176                    // Standalone HTML (not part of a block) - process immediately
177                    Some(extract_html_with_span(
178                        &html,
179                        include_verbatim,
180                        OffsetSpanProvider { offset: span.start, inner: &span_provider }
181                    ))
182                }
183            }
184
185            // Inline HTML (not part of a block)
186            Event::InlineHtml(html) => {
187                Some(extract_html_with_span(
188                    &html,
189                    include_verbatim,
190                    OffsetSpanProvider { offset: span.start, inner: &span_provider }
191                ))
192            }
193
194            // An inline code node.
195            Event::Code(code) => {
196                if include_verbatim {
197                    // inline code starts with '`', so offset the span by `1`.
198                    Some(extract_raw_uri_from_plaintext(
199                        &code,
200                        &OffsetSpanProvider { offset: span.start + 1, inner: &span_provider }
201                    ))
202                } else {
203                    None
204                }
205            }
206
207            Event::End(TagEnd::Link) => {
208                inside_link_block = false;
209                inside_wikilink_block = false;
210                None
211            }
212
213            // Skip footnote references and definitions explicitly - they're not links to check
214            #[allow(clippy::match_same_arms)]
215            Event::FootnoteReference(_) | Event::Start(Tag::FootnoteDefinition(_)) | Event::End(TagEnd::FootnoteDefinition) => None,
216
217            // Silently skip over other events
218            _ => None,
219        })
220        .flatten()
221        .collect()
222}
223
224fn get_email_span_provider<'a>(
225    span_provider: &'a SourceSpanProvider<'_>,
226    span: &std::ops::Range<usize>,
227    link_type: LinkType,
228) -> OffsetSpanProvider<'a> {
229    let offset = match link_type {
230        // We don't know how the link starts, so don't offset the span.
231        LinkType::Reference | LinkType::CollapsedUnknown | LinkType::ShortcutUnknown => 0,
232        // These start all with `[` or `<`, so offset the span by `1`.
233        LinkType::ReferenceUnknown
234        | LinkType::Collapsed
235        | LinkType::Shortcut
236        | LinkType::Autolink
237        | LinkType::Email => 1,
238        _ => {
239            debug_assert!(false, "Unexpected email link type: {link_type:?}");
240            0
241        }
242    };
243
244    OffsetSpanProvider {
245        offset: span.start + offset,
246        inner: span_provider,
247    }
248}
249
250/// Emulate `<img src="...">` tag to be compatible with HTML links.
251/// We might consider using the actual Markdown `LinkType` for better granularity in the future.
252fn extract_image(dest_url: &CowStr<'_>, span: RawUriSpan) -> Vec<RawUri> {
253    vec![RawUri {
254        text: dest_url.to_string(),
255        element: Some("img".to_string()),
256        attribute: Some("src".to_string()),
257        span,
258    }]
259}
260
261/// Emulate `<a href="...">` tag to be compatible with HTML links.
262/// We might consider using the actual Markdown `LinkType` for better granularity in the future.
263fn raw_uri(dest_url: &CowStr<'_>, span: RawUriSpan) -> Vec<RawUri> {
264    vec![RawUri {
265        text: dest_url.to_string(),
266        element: Some("a".to_string()),
267        attribute: Some("href".to_string()),
268        // Sadly, we don't know how long the `foo` part in `[foo](bar)` is,
269        // so the span points to the `[` and not to the `b`.
270        span,
271    }]
272}
273
274/// Extract fragments/anchors from a Markdown string.
275///
276/// Fragments are generated from headings using the same unique kebab case method as GitHub.
277/// If a [heading attribute](https://github.com/raphlinus/pulldown-cmark/blob/master/specs/heading_attrs.txt)
278/// is present,
279/// this will be added to the fragment set **alongside** the other generated fragment.
280/// It means a single heading such as `## Frag 1 {#frag-2}` would generate two fragments.
281pub(crate) fn extract_markdown_fragments(input: &str) -> HashSet<String> {
282    let mut in_heading = false;
283    let mut heading_text = String::new();
284    let mut heading_id: Option<CowStr<'_>> = None;
285    let mut id_generator = HeadingIdGenerator::default();
286
287    let mut out = HashSet::new();
288
289    for event in Parser::new_ext(input, md_extensions()) {
290        match event {
291            Event::Start(Tag::Heading { id, .. }) => {
292                heading_id = id;
293                in_heading = true;
294            }
295            Event::End(TagEnd::Heading(_)) => {
296                if let Some(frag) = heading_id.take() {
297                    out.insert(frag.to_string());
298                }
299
300                if !heading_text.is_empty() {
301                    let id = id_generator.generate(&heading_text);
302                    out.insert(id);
303                    heading_text.clear();
304                }
305
306                in_heading = false;
307            }
308            Event::Text(text) | Event::Code(text) => {
309                if in_heading {
310                    heading_text.push_str(&text);
311                }
312            }
313
314            // An HTML node
315            Event::Html(html) | Event::InlineHtml(html) => {
316                out.extend(extract_html_fragments(&html));
317            }
318
319            // Silently skip over other events
320            _ => (),
321        }
322    }
323    out
324}
325
326#[derive(Default)]
327struct HeadingIdGenerator {
328    counter: HashMap<String, usize>,
329}
330
331impl HeadingIdGenerator {
332    fn generate(&mut self, heading: &str) -> String {
333        let mut id = Self::into_kebab_case(heading);
334        let count = self.counter.entry(id.clone()).or_insert(0);
335        if *count != 0 {
336            id = format!("{}-{}", id, *count);
337        }
338        *count += 1;
339
340        id
341    }
342
343    /// Converts text into kebab case
344    #[must_use]
345    fn into_kebab_case(text: &str) -> String {
346        text.to_lowercase()
347            .chars()
348            .filter_map(|ch| {
349                if ch.is_alphanumeric() || ch == '_' || ch == '-' {
350                    Some(ch)
351                } else if ch.is_whitespace() {
352                    Some('-')
353                } else {
354                    None
355                }
356            })
357            .collect::<String>()
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use crate::types::uri::raw::span;
364
365    use super::*;
366
367    const MD_INPUT: &str = r#"
368# A Test
369
370Some link in text [here](https://foo.com)
371
372## A test {#well-still-the-same-test}
373
374Code:
375
376```bash
377https://bar.com/123
378```
379
380or inline like `https://bar.org` for instance.
381
382### Some `code` in a heading.
383
384[example](http://example.com)
385
386<span id="the-end">The End</span>
387        "#;
388
389    #[test]
390    fn test_extract_fragments() {
391        let expected = HashSet::from([
392            "a-test".to_string(),
393            "a-test-1".to_string(),
394            "well-still-the-same-test".to_string(),
395            "some-code-in-a-heading".to_string(),
396            "the-end".to_string(),
397        ]);
398        let actual = extract_markdown_fragments(MD_INPUT);
399        assert_eq!(actual, expected);
400    }
401
402    #[test]
403    fn test_skip_verbatim() {
404        let expected = vec![
405            RawUri {
406                text: "https://foo.com".to_string(),
407                element: Some("a".to_string()),
408                attribute: Some("href".to_string()),
409                span: span(4, 19),
410            },
411            RawUri {
412                text: "http://example.com".to_string(),
413                element: Some("a".to_string()),
414                attribute: Some("href".to_string()),
415                span: span(18, 1),
416            },
417        ];
418
419        let uris = extract_markdown(MD_INPUT, false, false);
420        assert_eq!(uris, expected);
421    }
422
423    #[test]
424    fn test_include_verbatim() {
425        let expected = vec![
426            RawUri {
427                text: "https://foo.com".to_string(),
428                element: Some("a".to_string()),
429                attribute: Some("href".to_string()),
430                span: span(4, 19),
431            },
432            RawUri {
433                text: "https://bar.com/123".to_string(),
434                element: None,
435                attribute: None,
436                span: span(11, 1),
437            },
438            RawUri {
439                text: "https://bar.org".to_string(),
440                element: None,
441                attribute: None,
442                span: span(14, 17),
443            },
444            RawUri {
445                text: "http://example.com".to_string(),
446                element: Some("a".to_string()),
447                attribute: Some("href".to_string()),
448                span: span(18, 1),
449            },
450        ];
451
452        let uris = extract_markdown(MD_INPUT, true, false);
453        assert_eq!(uris, expected);
454    }
455
456    #[test]
457    fn test_skip_verbatim_html() {
458        let input = "
459<code>
460http://link.com
461</code>
462<pre>
463Some pre-formatted http://pre.com
464</pre>";
465
466        let expected = vec![];
467
468        let uris = extract_markdown(input, false, false);
469        assert_eq!(uris, expected);
470    }
471
472    #[test]
473    fn test_kebab_case() {
474        let check = |input, expected| {
475            let actual = HeadingIdGenerator::into_kebab_case(input);
476            assert_eq!(actual, expected);
477        };
478        check("A Heading", "a-heading");
479        check(
480            "This header has a :thumbsup: in it",
481            "this-header-has-a-thumbsup-in-it",
482        );
483        check(
484            "Header with 한글 characters (using unicode)",
485            "header-with-한글-characters-using-unicode",
486        );
487        check(
488            "Underscores foo_bar_, dots . and numbers 1.7e-3",
489            "underscores-foo_bar_-dots--and-numbers-17e-3",
490        );
491        check("Many          spaces", "many----------spaces");
492    }
493
494    #[test]
495    fn test_markdown_math() {
496        let input = r"
497$$
498[\psi](\mathbf{L})
499$$
500";
501        let uris = extract_markdown(input, true, false);
502        assert!(uris.is_empty());
503    }
504
505    #[test]
506    fn test_single_word_footnote_is_not_detected_as_link() {
507        let markdown = "This footnote is[^actually] a link.\n\n[^actually]: not";
508        let expected = vec![];
509        let uris = extract_markdown(markdown, true, false);
510        assert_eq!(uris, expected);
511    }
512
513    #[test]
514    fn test_underscore_in_urls_middle() {
515        let markdown = r"https://example.com/_/foo";
516        let expected = vec![RawUri {
517            text: "https://example.com/_/foo".to_string(),
518            element: None,
519            attribute: None,
520            span: span(1, 1),
521        }];
522        let uris = extract_markdown(markdown, true, false);
523        assert_eq!(uris, expected);
524    }
525
526    #[test]
527    fn test_underscore_in_urls_end() {
528        let markdown = r"https://example.com/_";
529        let expected = vec![RawUri {
530            text: "https://example.com/_".to_string(),
531            element: None,
532            attribute: None,
533            span: span(1, 1),
534        }];
535        let uris = extract_markdown(markdown, true, false);
536        assert_eq!(uris, expected);
537    }
538
539    #[test]
540    fn test_wiki_link() {
541        let markdown = r"[[https://example.com/destination]]";
542        let expected = vec![RawUri {
543            text: "https://example.com/destination".to_string(),
544            element: Some("a".to_string()),
545            attribute: Some("wikilink".to_string()),
546            span: span(1, 3),
547        }];
548        let uris = extract_markdown(markdown, true, true);
549        assert_eq!(uris, expected);
550    }
551
552    #[test]
553    fn test_multiple_wiki_links() {
554        let markdown = r"[[https://example.com/destination]][[https://example.com/source]]";
555        let expected = vec![
556            RawUri {
557                text: "https://example.com/destination".to_string(),
558                element: Some("a".to_string()),
559                attribute: Some("wikilink".to_string()),
560                span: span(1, 3),
561            },
562            RawUri {
563                text: "https://example.com/source".to_string(),
564                element: Some("a".to_string()),
565                attribute: Some("wikilink".to_string()),
566                span: span(1, 38),
567            },
568        ];
569        let uris = extract_markdown(markdown, true, true);
570        assert_eq!(uris, expected);
571    }
572
573    #[test]
574    fn test_ignore_gitlab_toc() {
575        let markdown = r"[[_TOC_]][TOC]";
576        let uris = extract_markdown(markdown, true, true);
577        assert!(uris.is_empty());
578    }
579
580    #[test]
581    fn test_link_text_not_checked() {
582        // Test that link text is not extracted as a separate link by default
583        let markdown =
584            r"[https://lycheerepublic.gov/notexist (archive.org link)](https://example.com)";
585        let uris = extract_markdown(markdown, false, false);
586
587        // Should only extract the destination URL, not the link text
588        let expected = vec![RawUri {
589            text: "https://example.com".to_string(),
590            element: Some("a".to_string()),
591            attribute: Some("href".to_string()),
592            span: span(1, 1),
593        }];
594
595        assert_eq!(uris, expected);
596        assert_eq!(
597            uris.len(),
598            1,
599            "Should only find destination URL, not link text"
600        );
601    }
602
603    #[test]
604    fn test_link_text_checked_with_include_verbatim() {
605        // Test that link text IS extracted when include_verbatim is true
606        let markdown =
607            r"[https://lycheerepublic.gov/notexist (archive.org link)](https://example.com)";
608        let uris = extract_markdown(markdown, true, false);
609
610        // Should extract both the link text AND the destination URL
611        let expected = vec![
612            RawUri {
613                text: "https://example.com".to_string(),
614                element: Some("a".to_string()),
615                attribute: Some("href".to_string()),
616                span: span(1, 1),
617            },
618            RawUri {
619                text: "https://lycheerepublic.gov/notexist".to_string(),
620                element: None,
621                attribute: None,
622                span: span(1, 2),
623            },
624        ];
625
626        assert_eq!(
627            uris.len(),
628            2,
629            "Should find both destination URL and link text"
630        );
631        // Check that both expected URLs are present (order might vary)
632        for expected_uri in expected {
633            assert!(
634                uris.contains(&expected_uri),
635                "Missing expected URI: {expected_uri:?}"
636            );
637        }
638    }
639
640    #[test]
641    fn test_reference_links_extraction() {
642        // Test that all types of reference links are extracted correctly
643        let markdown = r"
644Inline link: [link1](target1.md)
645
646Reference link: [link2][ref2]
647Collapsed link: [link3][]
648Shortcut link: [link4]
649
650[ref2]: target2.md
651[link3]: target3.md
652[link4]: target4.md
653";
654        let uris = extract_markdown(markdown, false, false);
655
656        let expected = vec![
657            RawUri {
658                text: "target1.md".to_string(),
659                element: Some("a".to_string()),
660                attribute: Some("href".to_string()),
661                span: span(2, 14),
662            },
663            RawUri {
664                text: "target2.md".to_string(),
665                element: Some("a".to_string()),
666                attribute: Some("href".to_string()),
667                span: span(4, 17),
668            },
669            RawUri {
670                text: "target3.md".to_string(),
671                element: Some("a".to_string()),
672                attribute: Some("href".to_string()),
673                span: span(5, 17),
674            },
675            RawUri {
676                text: "target4.md".to_string(),
677                element: Some("a".to_string()),
678                span: span(6, 16),
679                attribute: Some("href".to_string()),
680            },
681        ];
682
683        assert_eq!(uris.len(), 4, "Should extract all four link types");
684
685        // Check that all expected URIs are present (order might vary)
686        for expected_uri in expected {
687            assert!(
688                uris.contains(&expected_uri),
689                "Missing expected URI: {expected_uri:?}. Found: {uris:?}"
690            );
691        }
692    }
693
694    #[test]
695    fn test_clean_wikilink() {
696        let markdown = r"
697[[foo|bar]]
698[[foo#bar]]
699[[foo#bar|baz]]
700";
701        let uris = extract_markdown(markdown, true, true);
702        let expected = vec![
703            RawUri {
704                text: "foo".to_string(),
705                element: Some("a".to_string()),
706                attribute: Some("wikilink".to_string()),
707                span: span(2, 3),
708            },
709            RawUri {
710                text: "foo".to_string(),
711                element: Some("a".to_string()),
712                attribute: Some("wikilink".to_string()),
713                span: span(3, 3),
714            },
715            RawUri {
716                text: "foo".to_string(),
717                element: Some("a".to_string()),
718                attribute: Some("wikilink".to_string()),
719                span: span(4, 3),
720            },
721        ];
722        assert_eq!(uris, expected);
723    }
724
725    #[test]
726    fn test_nested_html() {
727        let input = r#"<Foo>
728          <Bar href="https://example.com" >
729          Some text
730          </Bar>
731        </Foo>"#;
732
733        let expected = vec![RawUri {
734            text: "https://example.com".to_string(),
735            element: Some("bar".to_string()),
736            attribute: Some("href".to_string()),
737            span: span(2, 22),
738        }];
739
740        let uris = extract_markdown(input, false, false);
741
742        assert_eq!(uris, expected);
743    }
744
745    #[test]
746    fn test_wikilink_extraction_returns_none_on_empty_links() {
747        let markdown = r"
748[[|bar]]
749[[#bar]]
750[[#bar|baz]]
751";
752
753        let uris = extract_markdown(markdown, true, true);
754        assert!(uris.is_empty());
755    }
756
757    #[test]
758    fn test_mdx_multiline_jsx() {
759        let input = r#"<CardGroup cols={1}>
760  <Card
761    title="Example"
762    href="https://example.com"
763  >
764    Some text
765  </Card>
766</CardGroup>"#;
767
768        let expected = vec![RawUri {
769            text: "https://example.com".to_string(),
770            element: Some("card".to_string()),
771            attribute: Some("href".to_string()),
772            span: span(4, 11),
773        }];
774
775        let uris = extract_markdown(input, false, false);
776
777        assert_eq!(uris, expected);
778    }
779
780    // Test that Markdown links inside HTML blocks are still parsed correctly.
781    // pulldown_cmark parses block-level HTML tags as separate HTML blocks, so
782    // Markdown content between them is processed normally.
783    #[test]
784    fn test_markdown_inside_html_block() {
785        let input = r"<div>
786
787[markdown link](https://example.com/markdown)
788
789</div>
790
791<span>[another link](https://example.com/another)</span>";
792
793        let uris = extract_markdown(input, false, false);
794
795        // Verify both Markdown links are extracted
796        let expected_urls = vec![
797            "https://example.com/markdown",
798            "https://example.com/another",
799        ];
800
801        assert_eq!(uris.len(), 2, "Should extract both Markdown links");
802
803        for expected_url in expected_urls {
804            assert!(
805                uris.iter().any(|u| u.text == expected_url),
806                "Should find URL: {expected_url}"
807            );
808        }
809
810        // Verify they're recognized as Markdown links (i.e. element: "a", attribute: "href")
811        for uri in &uris {
812            assert_eq!(uri.element, Some("a".to_string()));
813            assert_eq!(uri.attribute, Some("href".to_string()));
814        }
815    }
816
817    #[test]
818    fn test_remove_wikilink_potholes_and_fragments() {
819        let markdown = r"[[foo#bar|baz]]";
820        let uris = extract_markdown(markdown, true, true);
821        let expected = vec![RawUri {
822            text: "foo".to_string(),
823            element: Some("a".to_string()),
824            attribute: Some("wikilink".to_string()),
825            span: span(1, 3),
826        }];
827        assert_eq!(uris, expected);
828    }
829}