1use log::info;
23use std::result::Result;
24
25enum State {
26 InsideDescriptor,
27 AfterDescriptor,
28 InsideParens,
29}
30
31fn split_at<F>(input: &str, predicate: F) -> (&str, &str)
37where
38 F: Fn(&char) -> bool,
39{
40 for (i, ch) in input.char_indices() {
41 if !predicate(&ch) {
42 return input.split_at(i);
43 }
44 }
45 (input, "")
46}
47
48pub(crate) fn parse(input: &str) -> Vec<&str> {
53 let mut candidates: Vec<&str> = Vec::new();
54 let mut remaining = input;
55 while !remaining.is_empty() {
56 remaining = match parse_one_url(remaining) {
57 Ok((rem, None)) => rem,
58 Ok((rem, Some(url))) => {
59 candidates.push(url);
60 rem
61 }
62 Err(e) => {
63 info!("{e}");
64 return vec![];
65 }
66 }
67 }
68
69 candidates
70}
71
72fn parse_one_url(remaining: &str) -> Result<(&str, Option<&str>), String> {
80 let (start, remaining) = split_at(remaining, |c| *c == ',' || c.is_ascii_whitespace());
81
82 if start.find(',').is_some() {
83 return Err("srcset parse error (too many commas)".to_string());
84 }
85
86 if remaining.is_empty() {
87 return Ok(("", None));
88 }
89
90 let (url, remaining) = split_at(remaining, |c| !c.is_ascii_whitespace());
91
92 let comma_count = url.chars().rev().take_while(|c| *c == ',').count();
93 if comma_count > 1 {
94 return Err("srcset parse error (trailing commas)".to_string());
95 }
96
97 let url = url.get(..url.len() - comma_count);
98
99 let (_spaces, remaining) = split_at(remaining, char::is_ascii_whitespace);
100
101 let remaining = skip_descriptor(remaining);
102
103 Ok((remaining, url))
104}
105
106#[allow(clippy::single_match)]
110fn skip_descriptor(remaining: &str) -> &str {
111 let mut state = State::InsideDescriptor;
112
113 for (i, c) in remaining.char_indices() {
114 match state {
115 State::InsideDescriptor => match c {
116 c if c.is_ascii_whitespace() => state = State::AfterDescriptor,
117 '(' => state = State::InsideParens,
118 ',' => return &remaining[i + c.len_utf8()..], _ => (),
120 },
121 State::InsideParens => match c {
122 ')' => state = State::InsideDescriptor,
123 _ => (),
124 },
125 State::AfterDescriptor => match c {
126 c if c.is_ascii_whitespace() => (),
127 _ => state = State::InsideDescriptor,
128 },
129 }
130 }
131
132 ""
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138
139 #[test]
140 fn test_collect_sequence_characters_with_empty_string() {
141 let (sequence, remainder) = split_at("", |c| c.is_alphabetic());
142 assert_eq!(sequence, "");
143 assert_eq!(remainder, "");
144 }
145
146 #[test]
147 fn test_collect_sequence_characters_with_alphabetic_predicate() {
148 let (sequence, remainder) = split_at("abc123", |c| c.is_alphabetic());
149 assert_eq!(sequence, "abc");
150 assert_eq!(remainder, "123");
151 }
152
153 #[test]
154 fn test_collect_sequence_characters_with_digit_predicate() {
155 let (sequence, remainder) = split_at("123abc", char::is_ascii_digit);
156 assert_eq!(sequence, "123");
157 assert_eq!(remainder, "abc");
158 }
159
160 #[test]
161 fn test_collect_sequence_characters_with_no_match() {
162 let (sequence, remainder) = split_at("123abc", |c| c.is_whitespace());
163 assert_eq!(sequence, "");
164 assert_eq!(remainder, "123abc");
165 }
166
167 #[test]
168 fn test_collect_sequence_characters_with_all_match() {
169 let (sequence, remainder) = split_at("123abc", |c| !c.is_whitespace());
170 assert_eq!(sequence, "123abc");
171 assert_eq!(remainder, "");
172 }
173
174 #[test]
175 fn test_parse_no_value() {
176 assert!(parse("").is_empty());
177 }
178
179 #[test]
180 fn test_parse_url_one_value() {
181 let candidates = vec!["test-img-320w.jpg".to_string()];
182 assert_eq!(parse("test-img-320w.jpg 320w"), candidates);
183 }
184
185 #[test]
186 fn test_parse_srcset_two_values() {
187 assert_eq!(
188 parse("test-img-320w.jpg 320w, test-img-480w.jpg 480w"),
189 vec![
190 "test-img-320w.jpg".to_string(),
191 "test-img-480w.jpg".to_string(),
192 ]
193 );
194 }
195
196 #[test]
197 fn test_parse_srcset_with_unencoded_comma() {
198 assert_eq!(
199 parse(
200 "/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 640w, /cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 750w"
201 ),
202 vec![
203 "/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg".to_string(),
204 "/cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg".to_string(),
205 ]
206 );
207 }
208
209 #[test]
210 fn test_parse_srcset_url() {
211 assert_eq!(
212 parse("https://example.com/image1.jpg 1x, https://example.com/image2.jpg 2x"),
213 vec![
214 "https://example.com/image1.jpg",
215 "https://example.com/image2.jpg"
216 ]
217 );
218 }
219
220 #[test]
221 fn test_parse_srcset_with_commas() {
222 assert_eq!(
223 parse(
224 "/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 640w, /cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 750w"
225 ),
226 vec![
227 "/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg",
228 "/cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg"
229 ]
230 );
231 }
232
233 #[test]
234 fn test_parse_srcset_without_spaces() {
235 assert_eq!(
236 parse(
237 "/300.png 300w,/600.png 600w,/900.png 900w,https://x.invalid/a.png 1000w,relative.png 10w"
238 ),
239 vec![
240 "/300.png",
241 "/600.png",
242 "/900.png",
243 "https://x.invalid/a.png",
244 "relative.png"
245 ]
246 );
247 }
248}