1use percent_encoding::percent_decode_str;
2use reqwest::Url;
3use std::collections::HashSet;
4use std::path::{Path, PathBuf};
5
6use crate::{
7 Base, BasicAuthCredentials, ErrorKind, LycheeResult, Request, RequestError, Uri,
8 basic_auth::BasicAuthExtractor,
9 types::{ResolvedInputSource, uri::raw::RawUri},
10 utils::{path, url},
11};
12
13pub(crate) fn extract_credentials(
15 extractor: Option<&BasicAuthExtractor>,
16 uri: &Uri,
17) -> Option<BasicAuthCredentials> {
18 extractor.as_ref().and_then(|ext| ext.matches(uri))
19}
20
21fn create_request(
23 raw_uri: &RawUri,
24 source: &ResolvedInputSource,
25 root_dir: Option<&PathBuf>,
26 base: Option<&Base>,
27 extractor: Option<&BasicAuthExtractor>,
28) -> LycheeResult<Request> {
29 let uri = try_parse_into_uri(raw_uri, source, root_dir, base)?;
30 let source = source.clone();
31 let element = raw_uri.element.clone();
32 let attribute = raw_uri.attribute.clone();
33 let credentials = extract_credentials(extractor, &uri);
34
35 Ok(Request::new(uri, source, element, attribute, credentials))
36}
37
38fn try_parse_into_uri(
50 raw_uri: &RawUri,
51 source: &ResolvedInputSource,
52 root_dir: Option<&PathBuf>,
53 base: Option<&Base>,
54) -> LycheeResult<Uri> {
55 let text = prepend_root_dir_if_absolute_local_link(&raw_uri.text, root_dir);
56 let uri = match Uri::try_from(raw_uri.clone()) {
57 Ok(uri) => uri,
58 Err(_) => match base {
59 Some(base_url) => match base_url.join(&text) {
60 Some(url) => Uri { url },
61 None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
62 },
63 None => match source {
64 ResolvedInputSource::FsPath(root) => {
65 create_uri_from_file_path(root, &text, root_dir.is_none())?
66 }
67 _ => return Err(ErrorKind::UnsupportedUriType(text)),
68 },
69 },
70 };
71 Ok(uri)
72}
73
74pub(crate) fn is_anchor(text: &str) -> bool {
76 text.starts_with('#')
77}
78
79fn create_uri_from_file_path(
87 file_path: &Path,
88 link_text: &str,
89 ignore_absolute_local_links: bool,
90) -> LycheeResult<Uri> {
91 let target_path = if is_anchor(link_text) {
92 let file_name = file_path
94 .file_name()
95 .and_then(|name| name.to_str())
96 .ok_or_else(|| ErrorKind::InvalidFile(file_path.to_path_buf()))?;
97
98 format!("{file_name}{link_text}")
99 } else {
100 link_text.to_string()
101 };
102 let Ok(constructed_url) =
103 resolve_and_create_url(file_path, &target_path, ignore_absolute_local_links)
104 else {
105 return Err(ErrorKind::InvalidPathToUri(target_path));
106 };
107 Ok(Uri {
108 url: constructed_url,
109 })
110}
111
112pub(crate) fn create(
119 uris: Vec<RawUri>,
120 source: &ResolvedInputSource,
121 root_dir: Option<&PathBuf>,
122 base: Option<&Base>,
123 extractor: Option<&BasicAuthExtractor>,
124) -> Vec<Result<Request, RequestError>> {
125 let base = base.cloned().or_else(|| Base::from_source(source));
126
127 let mut requests = HashSet::<Request>::new();
128 let mut errors = Vec::<RequestError>::new();
129
130 for raw_uri in uris {
131 let result = create_request(&raw_uri, source, root_dir, base.as_ref(), extractor);
132 match result {
133 Ok(request) => {
134 requests.insert(request);
135 }
136 Err(e) => errors.push(RequestError::CreateRequestItem(
137 raw_uri.clone(),
138 source.clone(),
139 e,
140 )),
141 }
142 }
143
144 (requests.into_iter().map(Result::Ok))
145 .chain(errors.into_iter().map(Result::Err))
146 .collect()
147}
148
149fn resolve_and_create_url(
161 src_path: &Path,
162 dest_path: &str,
163 ignore_absolute_local_links: bool,
164) -> LycheeResult<Url> {
165 let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path);
166
167 let decoded_dest = percent_decode_str(dest_path).decode_utf8()?;
170
171 let Ok(Some(resolved_path)) = path::resolve(
172 src_path,
173 &PathBuf::from(&*decoded_dest),
174 ignore_absolute_local_links,
175 ) else {
176 return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string()));
177 };
178
179 let Ok(mut url) = Url::from_file_path(&resolved_path) else {
180 return Err(ErrorKind::InvalidUrlFromPath(resolved_path.clone()));
181 };
182
183 url.set_fragment(fragment);
184 Ok(url)
185}
186
187fn prepend_root_dir_if_absolute_local_link(text: &str, root_dir: Option<&PathBuf>) -> String {
188 if text.starts_with('/')
189 && let Some(path) = root_dir
190 && let Some(path_str) = path.to_str()
191 {
192 return format!("{path_str}{text}");
193 }
194 text.to_string()
195}
196
197#[cfg(test)]
198mod tests {
199 use std::borrow::Cow;
200 use std::num::NonZeroUsize;
201
202 use crate::types::uri::raw::RawUriSpan;
203
204 use super::*;
205
206 fn create_ok_only(
213 uris: Vec<RawUri>,
214 source: &ResolvedInputSource,
215 root_dir: Option<&PathBuf>,
216 base: Option<&Base>,
217 extractor: Option<&BasicAuthExtractor>,
218 ) -> Vec<Request> {
219 create(uris, source, root_dir, base, extractor)
220 .into_iter()
221 .filter_map(Result::ok)
222 .collect()
223 }
224
225 fn raw_uri(text: &'static str) -> RawUri {
226 RawUri {
227 text: text.to_string(),
228 element: None,
229 attribute: None,
230 span: RawUriSpan {
231 line: NonZeroUsize::MAX,
232 column: None,
233 },
234 }
235 }
236
237 #[test]
238 fn test_is_anchor() {
239 assert!(is_anchor("#anchor"));
240 assert!(!is_anchor("notan#anchor"));
241 }
242
243 #[test]
244 fn test_create_uri_from_path() {
245 let result =
246 resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", true).unwrap();
247 assert_eq!(result.as_str(), "file:///test+encoding");
248 }
249
250 #[test]
251 fn test_relative_url_resolution() {
252 let base = Base::try_from("https://example.com/path/page.html").unwrap();
253 let source = ResolvedInputSource::String(Cow::Borrowed(""));
254
255 let uris = vec![raw_uri("relative.html")];
256 let requests = create_ok_only(uris, &source, None, Some(&base), None);
257
258 assert_eq!(requests.len(), 1);
259 assert!(
260 requests
261 .iter()
262 .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
263 );
264 }
265
266 #[test]
267 fn test_absolute_url_resolution() {
268 let base = Base::try_from("https://example.com/path/page.html").unwrap();
269 let source = ResolvedInputSource::String(Cow::Borrowed(""));
270
271 let uris = vec![raw_uri("https://another.com/page")];
272 let requests = create_ok_only(uris, &source, None, Some(&base), None);
273
274 assert_eq!(requests.len(), 1);
275 assert!(
276 requests
277 .iter()
278 .any(|r| r.uri.url.as_str() == "https://another.com/page")
279 );
280 }
281
282 #[test]
283 fn test_root_relative_url_resolution() {
284 let base = Base::try_from("https://example.com/path/page.html").unwrap();
285 let source = ResolvedInputSource::String(Cow::Borrowed(""));
286
287 let uris = vec![raw_uri("/root-relative")];
288 let requests = create_ok_only(uris, &source, None, Some(&base), None);
289
290 assert_eq!(requests.len(), 1);
291 assert!(
292 requests
293 .iter()
294 .any(|r| r.uri.url.as_str() == "https://example.com/root-relative")
295 );
296 }
297
298 #[test]
299 fn test_parent_directory_url_resolution() {
300 let base = Base::try_from("https://example.com/path/page.html").unwrap();
301 let source = ResolvedInputSource::String(Cow::Borrowed(""));
302
303 let uris = vec![raw_uri("../parent")];
304 let requests = create_ok_only(uris, &source, None, Some(&base), None);
305
306 assert_eq!(requests.len(), 1);
307 assert!(
308 requests
309 .iter()
310 .any(|r| r.uri.url.as_str() == "https://example.com/parent")
311 );
312 }
313
314 #[test]
315 fn test_fragment_url_resolution() {
316 let base = Base::try_from("https://example.com/path/page.html").unwrap();
317 let source = ResolvedInputSource::String(Cow::Borrowed(""));
318
319 let uris = vec![raw_uri("#fragment")];
320 let requests = create_ok_only(uris, &source, None, Some(&base), None);
321
322 assert_eq!(requests.len(), 1);
323 assert!(
324 requests
325 .iter()
326 .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
327 );
328 }
329
330 #[test]
331 fn test_relative_url_resolution_from_root_dir() {
332 let root_dir = PathBuf::from("/tmp/lychee");
333 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
334
335 let uris = vec![raw_uri("relative.html")];
336 let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
337
338 assert_eq!(requests.len(), 1);
339 assert!(
340 requests
341 .iter()
342 .any(|r| r.uri.url.as_str() == "file:///some/relative.html")
343 );
344 }
345
346 #[test]
347 fn test_absolute_url_resolution_from_root_dir() {
348 let root_dir = PathBuf::from("/tmp/lychee");
349 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
350
351 let uris = vec![raw_uri("https://another.com/page")];
352 let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
353
354 assert_eq!(requests.len(), 1);
355 assert!(
356 requests
357 .iter()
358 .any(|r| r.uri.url.as_str() == "https://another.com/page")
359 );
360 }
361
362 #[test]
363 fn test_root_relative_url_resolution_from_root_dir() {
364 let root_dir = PathBuf::from("/tmp/lychee");
365 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
366
367 let uris = vec![raw_uri("/root-relative")];
368 let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
369
370 assert_eq!(requests.len(), 1);
371 assert!(
372 requests
373 .iter()
374 .any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative")
375 );
376 }
377
378 #[test]
379 fn test_parent_directory_url_resolution_from_root_dir() {
380 let root_dir = PathBuf::from("/tmp/lychee");
381 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
382
383 let uris = vec![raw_uri("../parent")];
384 let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
385
386 assert_eq!(requests.len(), 1);
387 assert!(
388 requests
389 .iter()
390 .any(|r| r.uri.url.as_str() == "file:///parent")
391 );
392 }
393
394 #[test]
395 fn test_fragment_url_resolution_from_root_dir() {
396 let root_dir = PathBuf::from("/tmp/lychee");
397 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
398
399 let uris = vec![raw_uri("#fragment")];
400 let requests = create_ok_only(uris, &source, Some(&root_dir), None, None);
401
402 assert_eq!(requests.len(), 1);
403 assert!(
404 requests
405 .iter()
406 .any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment")
407 );
408 }
409
410 #[test]
411 fn test_relative_url_resolution_from_root_dir_and_base_url() {
412 let root_dir = PathBuf::from("/tmp/lychee");
413 let base = Base::try_from("https://example.com/path/page.html").unwrap();
414 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
415
416 let uris = vec![raw_uri("relative.html")];
417 let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
418
419 assert_eq!(requests.len(), 1);
420 assert!(
421 requests
422 .iter()
423 .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html")
424 );
425 }
426
427 #[test]
428 fn test_absolute_url_resolution_from_root_dir_and_base_url() {
429 let root_dir = PathBuf::from("/tmp/lychee");
430 let base = Base::try_from("https://example.com/path/page.html").unwrap();
431 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
432
433 let uris = vec![raw_uri("https://another.com/page")];
434 let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
435
436 assert_eq!(requests.len(), 1);
437 assert!(
438 requests
439 .iter()
440 .any(|r| r.uri.url.as_str() == "https://another.com/page")
441 );
442 }
443
444 #[test]
445 fn test_root_relative_url_resolution_from_root_dir_and_base_url() {
446 let root_dir = PathBuf::from("/tmp/lychee");
447 let base = Base::try_from("https://example.com/path/page.html").unwrap();
448 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
449
450 let uris = vec![raw_uri("/root-relative")];
451 let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
452
453 assert_eq!(requests.len(), 1);
454 assert!(
455 requests
456 .iter()
457 .any(|r| r.uri.url.as_str() == "https://example.com/tmp/lychee/root-relative")
458 );
459 }
460
461 #[test]
462 fn test_parent_directory_url_resolution_from_root_dir_and_base_url() {
463 let root_dir = PathBuf::from("/tmp/lychee");
464 let base = Base::try_from("https://example.com/path/page.html").unwrap();
465 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
466
467 let uris = vec![raw_uri("../parent")];
468 let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
469
470 assert_eq!(requests.len(), 1);
471 assert!(
472 requests
473 .iter()
474 .any(|r| r.uri.url.as_str() == "https://example.com/parent")
475 );
476 }
477
478 #[test]
479 fn test_fragment_url_resolution_from_root_dir_and_base_url() {
480 let root_dir = PathBuf::from("/tmp/lychee");
481 let base = Base::try_from("https://example.com/path/page.html").unwrap();
482 let source = ResolvedInputSource::FsPath(PathBuf::from("/some/page.html"));
483
484 let uris = vec![raw_uri("#fragment")];
485 let requests = create_ok_only(uris, &source, Some(&root_dir), Some(&base), None);
486
487 assert_eq!(requests.len(), 1);
488 assert!(
489 requests
490 .iter()
491 .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment")
492 );
493 }
494
495 #[test]
496 fn test_no_base_url_resolution() {
497 let source = ResolvedInputSource::String(Cow::Borrowed(""));
498
499 let uris = vec![raw_uri("https://example.com/page")];
500 let requests = create_ok_only(uris, &source, None, None, None);
501
502 assert_eq!(requests.len(), 1);
503 assert!(
504 requests
505 .iter()
506 .any(|r| r.uri.url.as_str() == "https://example.com/page")
507 );
508 }
509
510 #[test]
511 fn test_create_request_from_relative_file_path() {
512 let base = Base::Local(PathBuf::from("/tmp/lychee"));
513 let input_source = ResolvedInputSource::FsPath(PathBuf::from("page.html"));
514
515 let actual = create_request(
516 &raw_uri("file.html"),
517 &input_source,
518 None,
519 Some(&base),
520 None,
521 )
522 .unwrap();
523
524 assert_eq!(
525 actual,
526 Request::new(
527 Uri {
528 url: Url::from_file_path("/tmp/lychee/file.html").unwrap()
529 },
530 input_source,
531 None,
532 None,
533 None,
534 )
535 );
536 }
537
538 #[test]
539 fn test_create_request_from_relative_file_path_errors() {
540 assert!(
542 create_request(
543 &raw_uri("file.html"),
544 &ResolvedInputSource::Stdin,
545 None,
546 None,
547 None,
548 )
549 .is_err()
550 );
551
552 assert!(
554 create_request(
555 &raw_uri("/file.html"),
556 &ResolvedInputSource::FsPath(PathBuf::from("page.html")),
557 None,
558 None,
559 None,
560 )
561 .is_err()
562 );
563 }
564
565 #[test]
566 fn test_create_request_from_absolute_file_path() {
567 let base = Base::Local(PathBuf::from("/tmp/lychee"));
568 let input_source = ResolvedInputSource::FsPath(PathBuf::from("/tmp/lychee/page.html"));
569
570 let actual = create_request(
572 &raw_uri("/usr/local/share/doc/example.html"),
573 &input_source,
574 None,
575 Some(&base),
576 None,
577 )
578 .unwrap();
579
580 assert_eq!(
581 actual,
582 Request::new(
583 Uri {
584 url: Url::from_file_path("/usr/local/share/doc/example.html").unwrap()
585 },
586 input_source,
587 None,
588 None,
589 None,
590 )
591 );
592 }
593
594 #[test]
595 fn test_parse_relative_path_into_uri() {
596 let base = Base::Local(PathBuf::from("/tmp/lychee"));
597 let source = ResolvedInputSource::String(Cow::Borrowed(""));
598
599 let raw_uri = raw_uri("relative.html");
600 let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
601
602 assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
603 }
604
605 #[test]
606 fn test_parse_absolute_path_into_uri() {
607 let base = Base::Local(PathBuf::from("/tmp/lychee"));
608 let source = ResolvedInputSource::String(Cow::Borrowed(""));
609
610 let raw_uri = raw_uri("absolute.html");
611 let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
612
613 assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
614 }
615
616 #[test]
617 fn test_prepend_with_absolute_local_link_and_root_dir() {
618 let text = "/absolute/path";
619 let root_dir = PathBuf::from("/root");
620 let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
621 assert_eq!(result, "/root/absolute/path");
622 }
623
624 #[test]
625 fn test_prepend_with_absolute_local_link_and_no_root_dir() {
626 let text = "/absolute/path";
627 let result = prepend_root_dir_if_absolute_local_link(text, None);
628 assert_eq!(result, "/absolute/path");
629 }
630
631 #[test]
632 fn test_prepend_with_relative_link_and_root_dir() {
633 let text = "relative/path";
634 let root_dir = PathBuf::from("/root");
635 let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
636 assert_eq!(result, "relative/path");
637 }
638
639 #[test]
640 fn test_prepend_with_relative_link_and_no_root_dir() {
641 let text = "relative/path";
642 let result = prepend_root_dir_if_absolute_local_link(text, None);
643 assert_eq!(result, "relative/path");
644 }
645}