atom/uri/
mod.rs

1//! # Atom URI Format
2//!
3//! This module provides comprehensive parsing and handling of Atom URIs, which are
4//! used to reference atoms from various sources including Git repositories, local
5//! paths, and external URLs.
6//!
7//! ## URI Format
8//!
9//! Atom URIs follow this general format:
10//! ```text
11//! [scheme://][alias:][url-fragment::]atom-id[@version]
12//! ```
13//!
14//! ### Components
15//!
16//! - **scheme** - Optional protocol (e.g., `https://`, `ssh://`, `file://`)
17//! - **alias** - Optional user-configurable URL shortener (e.g., `gh` for GitHub)
18//! - **url-fragment** - Optional path within the repository
19//! - **atom-id** - Required atom identifier (Unicode string)
20//! - **version** - Optional version requirement (e.g., `@1.0.0`, `@^1.0`)
21//!
22//! ## Key Types
23//!
24//! - [`Uri`] - The main parsed URI structure
25//! - [`AliasedUrl`] - URL with optional alias resolution
26//! - [`UriError`] - Errors that can occur during URI parsing
27//!
28//! ## Alias System
29//!
30//! Aliases provide a convenient way to shorten common URLs. They are configured
31//! in the Eka configuration file and can reference full URLs or other aliases.
32//!
33//! ### Alias Examples
34//! - `gh:owner/repo::my-atom` → `https://github.com/owner/repo::my-atom`
35//! - `work:repo::my-atom` → `https://github.com/my-work-org/repo::my-atom`
36//! - `local::my-atom` → `file:///path/to/repo::my-atom`
37//!
38//! ## URI Examples
39//!
40//! ```rust,no_run
41//! use atom::uri::Uri;
42//!
43//! // Simple atom reference
44//! let uri: Uri = "my-atom".parse().unwrap();
45//! assert_eq!(uri.tag().to_string(), "my-atom");
46//!
47//! // Atom with version
48//! let uri: Uri = "my-atom@^1.0.0".parse().unwrap();
49//! assert_eq!(uri.tag().to_string(), "my-atom");
50//!
51//! // GitHub reference with alias
52//! let uri: Uri = "gh:user/repo::my-atom".parse().unwrap();
53//! assert_eq!(uri.url().unwrap().host().unwrap(), "github.com");
54//!
55//! // Direct URL reference
56//! let uri: Uri = "https://github.com/user/repo::my-atom".parse().unwrap();
57//! assert_eq!(uri.url().unwrap().host().unwrap(), "github.com");
58//!
59//! // Local file reference
60//! let uri: Uri = "file:///path/to/repo::my-atom".parse().unwrap();
61//! assert_eq!(uri.url().unwrap().scheme, "file".into());
62//! ```
63//!
64//! ## Error Handling
65//!
66//! The URI parser provides detailed error messages for common issues:
67//! - Invalid atom IDs (wrong characters, too long, etc.)
68//! - Unknown aliases
69//! - Malformed URLs
70//! - Invalid version specifications
71//! - Missing required components
72#[cfg(test)]
73mod tests;
74
75use std::collections::HashMap;
76use std::fmt::Display;
77use std::ops::{Deref, Not};
78use std::str::FromStr;
79
80use gix::url as gix_url;
81use gix_url::Url;
82use semver::VersionReq;
83use serde::{Deserialize, Serialize};
84use thiserror::Error;
85
86use super::id::AtomTag;
87use crate::id::Error;
88
89#[derive(Debug)]
90struct Aliases(&'static HashMap<&'static str, &'static str>);
91
92/// Represents either an atom URI or an aliased URL component
93///
94/// When built through the `FromStr` implementation, aliases are resolved.
95#[derive(Debug, Clone)]
96pub enum UriOrUrl {
97    /// Atom URI variant
98    Atom(Uri),
99    /// URL variant
100    Pin(AliasedUrl),
101}
102
103/// Represents the parsed components of an Atom URI.
104///
105/// It is typically created through the `FromStr` implementation, not constructed directly.
106#[derive(Serialize, Deserialize, Debug, Clone)]
107pub struct Uri {
108    /// The URL to the repository containing the Atom.
109    url: Option<Url>,
110    /// The Atom's ID.
111    tag: AtomTag,
112    /// The requested Atom version.
113    version: Option<VersionReq>,
114}
115
116#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
117#[cfg_attr(test, derive(Serialize, Deserialize))]
118struct Ref<'a> {
119    #[cfg_attr(test, serde(borrow))]
120    url: UrlRef<'a>,
121    atom: AtomRef<'a>,
122}
123
124#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default)]
125#[cfg_attr(test, derive(Serialize, Deserialize))]
126/// a url potentially containing an alias
127pub struct AliasedUrl {
128    url: Url,
129    r#ref: Option<String>,
130}
131
132impl Display for AliasedUrl {
133    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134        let AliasedUrl { url, r#ref } = self;
135        if let Some(r) = r#ref {
136            if r.is_empty() {
137                url.fmt(f)
138            } else {
139                write!(f, "{}^^{}", url, r)
140            }
141        } else {
142            url.fmt(f)
143        }
144    }
145}
146
147impl FromStr for AliasedUrl {
148    type Err = UriError;
149
150    fn from_str(s: &str) -> Result<Self, Self::Err> {
151        let (r, url) = match split_carot(s) {
152            Ok((s, Some(u))) => (s, u),
153            Ok((s, None)) => ("", s),
154            _ => return Err(UriError::NoUrl),
155        };
156
157        let url = UrlRef::from(url);
158        let u = url.to_url();
159        let r#ref = r.is_empty().not().then_some(r.to_string());
160        if let Some(url) = u {
161            Ok(AliasedUrl { url, r#ref })
162        } else {
163            Err(UriError::NoUrl)
164        }
165    }
166}
167
168impl TryFrom<&str> for AliasedUrl {
169    type Error = UriError;
170
171    fn try_from(s: &str) -> Result<Self, Self::Error> {
172        FromStr::from_str(s)
173    }
174}
175
176#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Default)]
177#[cfg_attr(test, derive(Serialize, Deserialize))]
178struct UrlRef<'a> {
179    /// The URI scheme (e.g., "https", "ssh"), if present.
180    scheme: Option<&'a str>,
181    /// The username.
182    user: Option<&'a str>,
183    /// The password.
184    pass: Option<&'a str>,
185    /// A URL fragment which may contain an alias to be later expanded
186    frag: Option<&'a str>,
187}
188
189#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
190#[cfg_attr(test, derive(Serialize, Deserialize))]
191struct AtomRef<'a> {
192    /// The specific Atom within the repository.
193    tag: Option<&'a str>,
194    /// The version of the Atom, if specified.
195    version: Option<&'a str>,
196}
197
198use nom::branch::alt;
199use nom::bytes::complete::{tag, take_until};
200use nom::character::complete::digit1;
201use nom::combinator::{all_consuming, map, not, opt, peek, rest, verify};
202use nom::sequence::{separated_pair, tuple};
203use nom::{IResult, ParseTo};
204
205fn parse(input: &str) -> Ref<'_> {
206    let (rest, url) = match url(input) {
207        Ok(s) => s,
208        Err(_) => (input, None),
209    };
210
211    let url = url.map(UrlRef::from).unwrap_or_default();
212
213    let atom = AtomRef::from(rest);
214
215    tracing::trace!(
216        url.scheme,
217        url.user,
218        url.pass = url.pass.map(|_| "<redacted>"),
219        url.frag,
220        atom.tag,
221        atom.version,
222        "{}",
223        input
224    );
225
226    Ref { url, atom }
227}
228
229fn parse_alias(input: &str) -> (&str, Option<&str>) {
230    opt(verify(
231        map(
232            alt((
233                tuple((
234                    take_until::<_, _, ()>(":"),
235                    tag(":"),
236                    // not a port
237                    peek(not(digit1)),
238                )),
239                map(rest, |a| (a, "", ())),
240            )),
241            |(a, ..)| a,
242        ),
243        // not an scp url
244        |a| {
245            !(a as &str)
246                .chars()
247                .any(|c| c == ':' || c == '/' || c == '.')
248        },
249    ))(input)
250    .map(empty_none)
251    .unwrap_or((input, None))
252}
253
254fn parse_host(input: &str) -> IResult<&str, (&str, &str)> {
255    alt((first_path, ssh_host, map(rest, |a| (a, ""))))(input)
256}
257
258fn parse_port(input: &str) -> IResult<&str, Option<(&str, &str)>> {
259    opt(all_consuming(separated_pair(
260        take_until(":"),
261        tag(":"),
262        digit1,
263    )))(input)
264}
265
266fn ssh_host(input: &str) -> IResult<&str, (&str, &str)> {
267    let (rest, (host, colon)) = tuple((take_until(":"), tag(":")))(input)?;
268
269    let (rest, port) = opt(tuple((peek(digit1), take_until(":"), tag(":"))))(rest)?;
270
271    match port {
272        Some((_, port_str, second_colon)) => {
273            let full_host = &input[..(host.len() + colon.len() + port_str.len())];
274            Ok((rest, (full_host, second_colon)))
275        },
276        None => Ok((rest, (host, colon))),
277    }
278}
279
280fn first_path(input: &str) -> IResult<&str, (&str, &str)> {
281    tuple((
282        verify(take_until("/"), |h: &str| {
283            !h.contains(':') || parse_port(h).ok().and_then(|(_, p)| p).is_some()
284        }),
285        tag("/"),
286    ))(input)
287}
288
289type UrlPrefix<'a> = (Option<&'a str>, Option<&'a str>, Option<&'a str>);
290
291fn parse_url(url: &str) -> IResult<&str, UrlPrefix<'_>> {
292    let (rest, (scheme, user_pass)) = tuple((scheme, split_at))(url)?;
293
294    let (user, pass) = match user_pass {
295        Some(s) => match split_colon(s) {
296            Ok((p, Some(u))) => (Some(u), Some(p)),
297            Ok((u, None)) => (Some(u), None),
298            _ => (Some(s), None),
299        },
300        None => (None, None),
301    };
302
303    Ok((rest, (scheme, user, pass)))
304}
305
306fn not_empty(input: &str) -> Option<&str> {
307    if input.is_empty() { None } else { Some(input) }
308}
309
310fn empty_none<'a>((rest, opt): (&'a str, Option<&'a str>)) -> (&'a str, Option<&'a str>) {
311    (rest, opt.and_then(not_empty))
312}
313
314fn opt_split<'a>(input: &'a str, delim: &str) -> IResult<&'a str, Option<&'a str>> {
315    opt(map(tuple((take_until(delim), tag(delim))), |(url, _)| url))(input).map(empty_none)
316}
317
318fn url(input: &str) -> IResult<&str, Option<&str>> {
319    opt_split(input, "::")
320}
321
322fn scheme(input: &str) -> IResult<&str, Option<&str>> {
323    opt_split(input, "://")
324}
325
326fn split_at(input: &str) -> IResult<&str, Option<&str>> {
327    opt_split(input, "@")
328}
329
330fn split_carot(input: &str) -> IResult<&str, Option<&str>> {
331    opt_split(input, "^^")
332}
333
334fn split_colon(input: &str) -> IResult<&str, Option<&str>> {
335    opt_split(input, ":")
336}
337
338impl<'a> From<&'a str> for Ref<'a> {
339    /// Parses a string slice into a `Ref`.
340    ///
341    /// This is the primary way to create a `Ref` instance.
342    ///
343    /// # Arguments
344    ///
345    /// * `input` - A string slice containing the URI to parse.
346    ///
347    /// # Returns
348    ///
349    /// A `Ref` instance representing the parsed URI.
350    fn from(input: &'a str) -> Self {
351        parse(input)
352    }
353}
354
355/// A error encountered when constructing the concrete types from an Atom URI
356#[derive(Error, Debug)]
357pub enum UriError {
358    /// Malformed atom tag.
359    #[error(transparent)]
360    BadTag(#[from] Error),
361    /// The version requested is not valid.
362    #[error(transparent)]
363    InvalidVersionReq(#[from] semver::Error),
364    /// The Url did not parse correctly.
365    #[error(transparent)]
366    UrlParse(#[from] gix_url::parse::Error),
367    /// There is no alias in the configuration matching the one given in the URI.
368    #[error("The passed alias does not exist: {0}")]
369    NoAlias(String),
370    /// The Url is invalid
371    #[error("Parsing URL failed")]
372    NoUrl,
373    #[error("Missing the required Atom ID in URI")]
374    /// The Atom identifier is missing, but required
375    NoAtom,
376}
377
378use std::borrow::Cow;
379impl Aliases {
380    fn get_alias(&self, s: &str) -> Result<&str, UriError> {
381        self.get(s)
382            .map_or_else(|| Err(UriError::NoAlias(s.into())), |s| Ok(*s))
383    }
384
385    fn resolve_alias(&'static self, s: &str) -> Result<Cow<'static, str>, UriError> {
386        let res = self.get_alias(s)?;
387
388        // allow one level of indirection in alises, e.g. `org = gh:my-org`
389        let res = match res.split_once(':') {
390            Some((s, rest)) => {
391                let res = self.get_alias(s)?;
392                Cow::Owned(format!("{res}/{rest}"))
393            },
394            None => Cow::Borrowed(res),
395        };
396
397        Ok(res)
398    }
399}
400
401impl Deref for Aliases {
402    type Target = HashMap<&'static str, &'static str>;
403
404    fn deref(&self) -> &Self::Target {
405        self.0
406    }
407}
408
409impl<'a> From<&'a str> for UrlRef<'a> {
410    fn from(s: &'a str) -> Self {
411        let (scheme, user, pass, frag) = match parse_url(s) {
412            Ok((frag, (scheme, user, pass))) => (scheme, user, pass, not_empty(frag)),
413            _ => (None, None, None, None),
414        };
415
416        Self {
417            scheme,
418            user,
419            pass,
420            frag,
421        }
422    }
423}
424
425impl<'a> From<&'a str> for AtomRef<'a> {
426    fn from(s: &'a str) -> Self {
427        let (tag, version) = match split_at(s) {
428            Ok((rest, Some(atom))) => (Some(atom), not_empty(rest)),
429            Ok((rest, None)) => (not_empty(rest), None),
430            _ => (None, None),
431        };
432
433        AtomRef { tag, version }
434    }
435}
436
437use std::sync::LazyLock;
438static ALIASES: LazyLock<Aliases> = LazyLock::new(|| Aliases(config::CONFIG.aliases()));
439
440impl<'a> UrlRef<'a> {
441    fn render_alias(&self) -> Option<(&str, Option<Cow<'static, str>>)> {
442        let (frag, alias) = parse_alias(self.frag?);
443
444        alias.and_then(|a| ALIASES.resolve_alias(a).ok().map(|a| (frag, Some(a))))
445    }
446
447    fn to_url(&self) -> Option<Url> {
448        use gix_url::Scheme;
449
450        let (frag, resolved) = self.render_alias().unwrap_or((self.frag?, None));
451
452        #[allow(clippy::unnecessary_unwrap)]
453        let (rest, (maybe_host, delim)) = if resolved.is_some() {
454            resolved
455                .as_ref()
456                .and_then(|r| parse_host(r).ok())
457                .unwrap_or(("", (resolved.as_ref().unwrap(), "")))
458        } else {
459            parse_host(frag).unwrap_or(("", (frag, "")))
460        };
461
462        let (maybe_host, port) = parse_port(maybe_host)
463            .ok()
464            .and_then(|(_, h)| h.map(|(h, p)| (h, p.parse_to())))
465            .unwrap_or((maybe_host, None));
466
467        let host = addr::parse_dns_name(maybe_host).ok().and_then(|s| {
468            if s.has_known_suffix() && maybe_host.contains('.')
469                || self.user.is_some()
470                || self.pass.is_some()
471            {
472                Some(maybe_host)
473            } else {
474                None
475            }
476        });
477
478        let scheme: Scheme = self
479            .scheme
480            .unwrap_or_else(|| {
481                if host.is_none() {
482                    "file"
483                } else if delim == ":" || self.user.is_some() && self.pass.is_none() {
484                    "ssh"
485                } else {
486                    "https"
487                }
488            })
489            .into();
490
491        // special case for empty fragments, e.g. foo::my-atom
492        let rest = if rest.is_empty() { frag } else { rest };
493
494        let path = if host.is_none() {
495            format!("{maybe_host}{delim}{rest}")
496        } else if !rest.starts_with('/') {
497            format!("/{rest}")
498        } else {
499            rest.into()
500        };
501
502        tracing::trace!(
503            ?scheme,
504            delim,
505            host,
506            port,
507            path,
508            rest,
509            maybe_host,
510            frag,
511            ?resolved
512        );
513
514        let alternate_form = scheme == Scheme::File;
515        let port = if scheme == Scheme::Ssh {
516            tracing::warn!(
517                port,
518                "ignoring configured port due to an upstream parsing bug"
519            );
520            None
521        } else {
522            port
523        };
524
525        Url::from_parts(
526            scheme,
527            self.user.map(Into::into),
528            self.pass.map(Into::into),
529            host.map(Into::into),
530            port,
531            path.into(),
532            alternate_form,
533        )
534        .map_err(|e| {
535            tracing::debug!(?e);
536            e
537        })
538        .ok()
539    }
540}
541
542impl<'a> AtomRef<'a> {
543    fn render(&self) -> Result<(AtomTag, Option<VersionReq>), UriError> {
544        let tag = AtomTag::try_from(self.tag.ok_or(UriError::NoAtom)?)?;
545        let version = if let Some(v) = self.version {
546            VersionReq::parse(v)?.into()
547        } else {
548            None
549        };
550        Ok((tag, version))
551    }
552}
553
554impl<'a> TryFrom<Ref<'a>> for Uri {
555    type Error = UriError;
556
557    fn try_from(refs: Ref<'a>) -> Result<Self, Self::Error> {
558        let Ref { url, atom } = refs;
559
560        let url = url.to_url();
561
562        let (id, version) = atom.render()?;
563
564        tracing::trace!(?url, %id, ?version);
565
566        Ok(Uri {
567            url,
568            tag: id,
569            version,
570        })
571    }
572}
573
574impl<'a> TryFrom<UrlRef<'a>> for Url {
575    type Error = UriError;
576
577    fn try_from(refs: UrlRef<'a>) -> Result<Self, Self::Error> {
578        match refs.to_url() {
579            Some(url) => Ok(url),
580            None => Err(UriError::NoUrl),
581        }
582    }
583}
584
585impl FromStr for Uri {
586    type Err = UriError;
587
588    fn from_str(s: &str) -> Result<Self, Self::Err> {
589        let r = Ref::from(s);
590        Uri::try_from(r)
591    }
592}
593
594impl<'a> TryFrom<&'a str> for Uri {
595    type Error = UriError;
596
597    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
598        s.parse()
599    }
600}
601
602impl Display for Uri {
603    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
604        let url = self
605            .url
606            .as_ref()
607            .map(ToString::to_string)
608            .unwrap_or_default();
609        let version = self
610            .version
611            .as_ref()
612            .map(|v| format!("@{v}"))
613            .unwrap_or_default();
614        write!(
615            f,
616            "{}::{}{}",
617            &url.trim_end_matches('/'),
618            self.tag,
619            &version
620        )
621    }
622}
623
624impl Uri {
625    #[must_use]
626    /// Returns a reference to the Url parsed out of the Atom URI.
627    pub fn url(&self) -> Option<&Url> {
628        self.url.as_ref()
629    }
630
631    #[must_use]
632    /// Returns the Atom identifier parsed from the URI.
633    pub fn tag(&self) -> &AtomTag {
634        &self.tag
635    }
636
637    #[must_use]
638    /// Returns the Atom version parsed from the URI.
639    pub fn version(&self) -> Option<&VersionReq> {
640        self.version.as_ref()
641    }
642}
643
644impl Display for UriOrUrl {
645    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
646        match self {
647            UriOrUrl::Atom(uri) => uri.fmt(f),
648            UriOrUrl::Pin(url) => url.fmt(f),
649        }
650    }
651}
652
653impl FromStr for UriOrUrl {
654    type Err = UriError;
655
656    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
657        match s.parse::<Uri>() {
658            Ok(uri) => return Ok(UriOrUrl::Atom(uri)),
659            Err(e @ UriError::BadTag(_)) => {
660                if s.contains("::") {
661                    return Err(e);
662                }
663            },
664            Err(e @ UriError::InvalidVersionReq(_)) => return Err(e),
665            Err(_) => (),
666        }
667        s.parse::<AliasedUrl>().map(UriOrUrl::Pin)
668    }
669}