1use crate::{
2 Status,
3 chain::{ChainResult, Handler},
4};
5use async_trait::async_trait;
6use header::HeaderValue;
7use http::header;
8use regex::{Captures, Regex};
9use reqwest::{Request, Url};
10use std::{collections::HashMap, sync::LazyLock};
11
12static CRATES_PATTERN: LazyLock<Regex> =
13 LazyLock::new(|| Regex::new(r"^(https?://)?(www\.)?crates.io").unwrap());
14static YOUTUBE_PATTERN: LazyLock<Regex> =
15 LazyLock::new(|| Regex::new(r"^(https?://)?(www\.)?youtube(-nocookie)?\.com").unwrap());
16static YOUTUBE_SHORT_PATTERN: LazyLock<Regex> =
17 LazyLock::new(|| Regex::new(r"^(https?://)?(www\.)?(youtu\.?be)").unwrap());
18static GITHUB_BLOB_MARKDOWN_FRAGMENT_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
19 Regex::new(r"^https://github\.com/(?<user>.*?)/(?<repo>.*?)/blob/(?<path>.*?)/(?<file>.*\.(md|markdown)#.*)$")
20 .unwrap()
21});
22
23fn query(request: &Request) -> HashMap<String, String> {
25 request.url().query_pairs().into_owned().collect()
26}
27
28#[derive(Debug, Clone)]
29pub(crate) struct Quirk {
30 pub(crate) pattern: &'static LazyLock<Regex>,
31 pub(crate) rewrite: fn(Request, Captures) -> Request,
32}
33
34#[derive(Debug, Clone)]
35pub(crate) struct Quirks {
36 quirks: Vec<Quirk>,
37}
38
39impl Default for Quirks {
40 fn default() -> Self {
41 let quirks = vec![
42 Quirk {
43 pattern: &CRATES_PATTERN,
44 rewrite: |mut request, _| {
45 request
46 .headers_mut()
47 .insert(header::ACCEPT, HeaderValue::from_static("text/html"));
48 request
49 },
50 },
51 Quirk {
52 pattern: &YOUTUBE_PATTERN,
53 rewrite: |mut request, _| {
54 let video_id = match request.url().path() {
56 "/watch" => query(&request).get("v").map(ToOwned::to_owned),
57 path if path.starts_with("/embed/") => {
58 path.strip_prefix("/embed/").map(ToOwned::to_owned)
59 }
60 _ => return request,
61 };
62
63 if let Some(id) = video_id {
65 *request.url_mut() =
66 Url::parse(&format!("https://img.youtube.com/vi/{id}/0.jpg")).unwrap();
67 }
68
69 request
70 },
71 },
72 Quirk {
73 pattern: &YOUTUBE_SHORT_PATTERN,
74 rewrite: |mut request, _| {
75 let id = request.url().path().trim_start_matches('/');
77 if id.is_empty() {
78 return request;
79 }
80 *request.url_mut() =
81 Url::parse(&format!("https://img.youtube.com/vi/{id}/0.jpg")).unwrap();
82 request
83 },
84 },
85 Quirk {
86 pattern: &GITHUB_BLOB_MARKDOWN_FRAGMENT_PATTERN,
87 rewrite: |mut request, captures| {
88 let mut raw_url = String::new();
89 captures.expand(
90 "https://raw.githubusercontent.com/$user/$repo/$path/$file",
91 &mut raw_url,
92 );
93 *request.url_mut() = Url::parse(&raw_url).unwrap();
94 request
95 },
96 },
97 ];
98 Self { quirks }
99 }
100}
101
102impl Quirks {
103 pub(crate) fn apply(&self, request: Request) -> Request {
107 for quirk in &self.quirks {
108 if let Some(captures) = quirk.pattern.captures(request.url().clone().as_str()) {
109 return (quirk.rewrite)(request, captures);
110 }
111 }
112 request
114 }
115}
116
117#[async_trait]
118impl Handler<Request, Status> for Quirks {
119 async fn handle(&mut self, input: Request) -> ChainResult<Request, Status> {
120 ChainResult::Next(self.apply(input))
121 }
122}
123
124#[cfg(test)]
125mod tests {
126 use header::HeaderValue;
127 use http::{Method, header};
128 use reqwest::{Request, Url};
129
130 use super::Quirks;
131
132 #[derive(Debug)]
133 struct MockRequest(Request);
134
135 impl MockRequest {
136 fn new(method: Method, url: Url) -> Self {
137 Self(Request::new(method, url))
138 }
139 }
140
141 impl PartialEq for MockRequest {
142 fn eq(&self, other: &Self) -> bool {
143 self.0.url() == other.0.url() && self.0.method() == other.0.method()
144 }
145 }
146
147 #[test]
148 fn test_cratesio_request() {
149 let url = Url::parse("https://crates.io/crates/lychee").unwrap();
150 let request = Request::new(Method::GET, url);
151 let modified = Quirks::default().apply(request);
152
153 assert_eq!(
154 modified.headers().get(header::ACCEPT).unwrap(),
155 HeaderValue::from_static("text/html")
156 );
157 }
158
159 #[test]
160 fn test_youtube_video_request() {
161 let url = Url::parse("https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").unwrap();
162 let request = Request::new(Method::GET, url);
163 let modified = Quirks::default().apply(request);
164 let expected_url = Url::parse("https://img.youtube.com/vi/NlKuICiT470/0.jpg").unwrap();
165
166 assert_eq!(
167 MockRequest(modified),
168 MockRequest::new(Method::GET, expected_url)
169 );
170 }
171
172 #[test]
173 fn test_youtube_video_nocookie_request() {
174 let url = Url::parse("https://www.youtube-nocookie.com/embed/BIguvia6AvM").unwrap();
175 let request = Request::new(Method::GET, url);
176 let modified = Quirks::default().apply(request);
177 let expected_url = Url::parse("https://img.youtube.com/vi/BIguvia6AvM/0.jpg").unwrap();
178
179 assert_eq!(
180 MockRequest(modified),
181 MockRequest::new(Method::GET, expected_url)
182 );
183 }
184
185 #[test]
186 fn test_youtube_video_shortlink_request() {
187 let url = Url::parse("https://youtu.be/Rvu7N4wyFpk?t=42").unwrap();
188 let request = Request::new(Method::GET, url);
189 let modified = Quirks::default().apply(request);
190 let expected_url = Url::parse("https://img.youtube.com/vi/Rvu7N4wyFpk/0.jpg").unwrap();
191
192 assert_eq!(
193 MockRequest(modified),
194 MockRequest::new(Method::GET, expected_url)
195 );
196 }
197
198 #[test]
199 fn test_non_video_youtube_url_untouched() {
200 let url = Url::parse("https://www.youtube.com/channel/UCaYhcUwRBNscFNUKTjgPFiA").unwrap();
201 let request = Request::new(Method::GET, url.clone());
202 let modified = Quirks::default().apply(request);
203
204 assert_eq!(MockRequest(modified), MockRequest::new(Method::GET, url));
205 }
206
207 #[test]
208 fn test_github_blob_markdown_fragment_request() {
209 let cases = [
210 (
211 "https://github.com/moby/docker-image-spec/blob/main/spec.md#terminology",
212 "https://raw.githubusercontent.com/moby/docker-image-spec/main/spec.md#terminology",
213 ),
214 (
215 "https://github.com/moby/docker-image-spec/blob/main/spec.markdown#terminology",
216 "https://raw.githubusercontent.com/moby/docker-image-spec/main/spec.markdown#terminology",
217 ),
218 (
219 "https://github.com/moby/docker-image-spec/blob/main/spec.md",
220 "https://github.com/moby/docker-image-spec/blob/main/spec.md",
221 ),
222 (
223 "https://github.com/lycheeverse/lychee/blob/master/.gitignore#section",
224 "https://github.com/lycheeverse/lychee/blob/master/.gitignore#section",
225 ),
226 (
227 "https://github.com/lycheeverse/lychee/blob/v0.15.0/README.md#features",
228 "https://raw.githubusercontent.com/lycheeverse/lychee/v0.15.0/README.md#features",
229 ),
230 ];
231 for (origin, expect) in &cases {
232 let url = Url::parse(origin).unwrap();
233 let request = Request::new(Method::GET, url);
234 let modified = Quirks::default().apply(request);
235
236 assert_eq!(
237 MockRequest(modified),
238 MockRequest::new(Method::GET, Url::parse(expect).unwrap())
239 );
240 }
241 }
242
243 #[test]
244 fn test_no_quirk_applied() {
245 let url = Url::parse("https://endler.dev").unwrap();
246 let request = Request::new(Method::GET, url.clone());
247 let modified = Quirks::default().apply(request);
248
249 assert_eq!(MockRequest(modified), MockRequest::new(Method::GET, url));
250 }
251}