Skip to main content

lychee_lib/types/uri/
github.rs

1use std::{collections::HashSet, sync::LazyLock};
2
3use crate::{ErrorKind, Result, Uri};
4
5static GITHUB_API_EXCLUDED_ENDPOINTS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
6    HashSet::from_iter([
7        "about",
8        "collections",
9        "events",
10        "explore",
11        "features",
12        "issues",
13        "marketplace",
14        "new",
15        "notifications",
16        "pricing",
17        "pulls",
18        "sponsors",
19        "topics",
20        "watching",
21    ])
22});
23
24/// Uri path segments extracted from a GitHub URL
25#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
26pub struct GithubUri {
27    /// Organization name
28    pub owner: String,
29    /// Repository name
30    pub repo: String,
31    /// e.g. `issues` in `/org/repo/issues`
32    pub endpoint: Option<String>,
33}
34
35impl GithubUri {
36    /// Create a new GitHub URI without an endpoint
37    #[cfg(test)]
38    fn new<T: Into<String>>(owner: T, repo: T) -> Self {
39        GithubUri {
40            owner: owner.into(),
41            repo: repo.into(),
42            endpoint: None,
43        }
44    }
45
46    #[cfg(test)]
47    fn with_endpoint<T: Into<String>>(owner: T, repo: T, endpoint: T) -> Self {
48        GithubUri {
49            owner: owner.into(),
50            repo: repo.into(),
51            endpoint: Some(endpoint.into()),
52        }
53    }
54
55    // TODO: Support GitLab etc.
56    fn gh_org_and_repo(uri: &Uri) -> Result<GithubUri> {
57        fn remove_suffix<'a>(input: &'a str, suffix: &str) -> &'a str {
58            if let Some(stripped) = input.strip_suffix(suffix) {
59                return stripped;
60            }
61            input
62        }
63
64        debug_assert!(!uri.is_mail(), "Should only be called on a Website type!");
65
66        let Some(domain) = uri.domain() else {
67            return Err(ErrorKind::InvalidGithubUrl(uri.to_string()));
68        };
69
70        if !matches!(
71            domain,
72            "github.com" | "www.github.com" | "raw.githubusercontent.com"
73        ) {
74            return Err(ErrorKind::InvalidGithubUrl(uri.to_string()));
75        }
76
77        let parts: Vec<_> = match uri.path_segments() {
78            Some(parts) => parts.collect(),
79            None => return Err(ErrorKind::InvalidGithubUrl(uri.to_string())),
80        };
81
82        if parts.len() < 2 {
83            // Not a valid org/repo pair.
84            // Note: We don't check for exactly 2 here, because the GitHub
85            // API doesn't handle checking individual files inside repos or
86            // paths like <github.com/org/repo/issues>, so we are more
87            // permissive and only check for repo existence. This is the
88            // only way to get a basic check for private repos. Public repos
89            // are not affected and should work with a normal check.
90            return Err(ErrorKind::InvalidGithubUrl(uri.to_string()));
91        }
92
93        let owner = parts[0];
94        if GITHUB_API_EXCLUDED_ENDPOINTS.contains(owner) {
95            return Err(ErrorKind::InvalidGithubUrl(uri.to_string()));
96        }
97
98        let repo = parts[1];
99        // If the URL ends with `.git`, assume this is an SSH URL and strip
100        // the suffix. See https://github.com/lycheeverse/lychee/issues/384
101        let repo = remove_suffix(repo, ".git");
102
103        let endpoint = if parts.len() > 2 && !parts[2].is_empty() {
104            Some(parts[2..].join("/"))
105        } else {
106            None
107        };
108
109        Ok(GithubUri {
110            owner: owner.to_string(),
111            repo: repo.to_string(),
112            endpoint,
113        })
114    }
115}
116
117impl TryFrom<Uri> for GithubUri {
118    type Error = ErrorKind;
119
120    fn try_from(uri: Uri) -> Result<Self> {
121        GithubUri::gh_org_and_repo(&uri)
122    }
123}
124
125impl TryFrom<&Uri> for GithubUri {
126    type Error = ErrorKind;
127
128    fn try_from(uri: &Uri) -> Result<Self> {
129        GithubUri::gh_org_and_repo(uri)
130    }
131}
132
133#[cfg(test)]
134mod tests {
135
136    use super::*;
137    use test_utils::website;
138
139    #[test]
140    fn test_github() {
141        assert_eq!(
142            GithubUri::try_from(website!("http://github.com/lycheeverse/lychee")).unwrap(),
143            GithubUri::new("lycheeverse", "lychee")
144        );
145
146        assert_eq!(
147            GithubUri::try_from(website!("http://www.github.com/lycheeverse/lychee")).unwrap(),
148            GithubUri::new("lycheeverse", "lychee")
149        );
150
151        assert_eq!(
152            GithubUri::try_from(website!("https://github.com/lycheeverse/lychee")).unwrap(),
153            GithubUri::new("lycheeverse", "lychee")
154        );
155
156        assert_eq!(
157            GithubUri::try_from(website!("https://github.com/lycheeverse/lychee/")).unwrap(),
158            GithubUri::new("lycheeverse", "lychee")
159        );
160
161        assert_eq!(
162            GithubUri::try_from(website!("https://github.com/lycheeverse/lychee/foo/bar")).unwrap(),
163            GithubUri::with_endpoint("lycheeverse", "lychee", "foo/bar")
164        );
165
166        assert_eq!(
167            GithubUri::try_from(website!(
168                "https://github.com/Microsoft/python-language-server.git"
169            ))
170            .unwrap(),
171            GithubUri::new("Microsoft", "python-language-server")
172        );
173
174        assert_eq!(
175            GithubUri::try_from(website!(
176                "https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md"
177            ))
178            .unwrap(),
179            GithubUri::with_endpoint("lycheeverse", "lychee", "blob/master/NON_EXISTENT_FILE.md")
180        );
181    }
182
183    #[test]
184    fn test_github_false_positives() {
185        assert!(
186            GithubUri::try_from(website!("https://github.com/sponsors/analysis-tools-dev "))
187                .is_err()
188        );
189
190        assert!(
191            GithubUri::try_from(website!(
192                "https://github.com/marketplace/actions/lychee-broken-link-checker"
193            ))
194            .is_err()
195        );
196
197        assert!(GithubUri::try_from(website!("https://github.com/features/actions")).is_err());
198
199        assert!(
200            GithubUri::try_from(website!(
201                "https://pkg.go.dev/github.com/Debian/pkg-go-tools/cmd/pgt-gopath"
202            ))
203            .is_err()
204        );
205    }
206}