Skip to main content

lychee_lib/extract/
plaintext.rs

1use crate::{
2    types::uri::raw::{RawUri, SpanProvider},
3    utils::url,
4};
5
6/// Extract unparsed URL strings from plaintext
7pub(crate) fn extract_raw_uri_from_plaintext(
8    input: &str,
9    span_provider: &impl SpanProvider,
10) -> Vec<RawUri> {
11    url::find_links(input)
12        .map(|uri| RawUri {
13            text: uri.as_str().to_owned(),
14            element: None,
15            attribute: None,
16            span: span_provider.span(uri.start()),
17        })
18        .collect()
19}
20
21#[cfg(test)]
22mod tests {
23    use crate::types::uri::raw::{SourceSpanProvider, span};
24
25    use super::*;
26
27    fn extract(input: &str) -> Vec<RawUri> {
28        extract_raw_uri_from_plaintext(input, &SourceSpanProvider::from_input(input))
29    }
30
31    #[test]
32    fn test_extract_local_links() {
33        let input = "http://127.0.0.1/ and http://127.0.0.1:8888/ are local links.";
34        let links: Vec<RawUri> = extract(input);
35        assert_eq!(
36            links,
37            [
38                RawUri::from(("http://127.0.0.1/", span(1, 1))),
39                RawUri::from(("http://127.0.0.1:8888/", span(1, 23),)),
40            ]
41        );
42    }
43
44    #[test]
45    fn test_extract_link_at_end_of_line() {
46        let input = "https://www.apache.org/licenses/LICENSE-2.0\n";
47        let uri = RawUri::from((input.trim_end(), span(1, 1)));
48
49        let uris: Vec<RawUri> = extract(input);
50        assert_eq!(vec![uri], uris);
51    }
52}