atom/
lock.rs

1//! # Atom Lockfile Format
2//!
3//! This module provides the types and structures for working with Atom lockfiles.
4//! Lockfiles capture the exact versions and revisions of dependencies for reproducible
5//! builds, similar to Cargo.lock or flake.lock but designed for the Atom ecosystem.
6//!
7//! ## Overview
8//!
9//! The lockfile format uses TOML with tagged enums for type safety while maintaining
10//! portability across different tools and languages. Each dependency is represented
11//! as a tagged union that can represent different types of dependencies:
12//!
13//! - **Atom dependencies** - References to other atoms by ID and version
14//! - **Direct pins** - Direct references to external URLs with integrity verification
15//! - **Git pins** - References to specific Git repositories and commits
16//! - **Tarball pins** - References to tarball/zip archives
17//! - **Cross-atom references** - Dependencies sourced from other atoms
18//!
19//! ## Key Types
20//!
21//! - [`Lockfile`] - The root structure containing all resolved dependencies
22//! - [`Dep`] - Enum representing different types of dependencies
23//! - [`Src`] - Enum representing build-time sources
24//! - [`ResolutionMode`] - Controls whether to resolve direct or transitive dependencies
25//!
26//! ## Example Lockfile
27//!
28//! ```toml
29//! version = 1
30//!
31//! [[deps]]
32//! type = "atom"
33//! tag = "my-atom"
34//! version = "1.0.0"
35//! rev = "abc123..."
36//!
37//! [[deps]]
38//! type = "pin"
39//! name = "external-lib"
40//! url = "https://example.com/lib.tar.gz"
41//! hash = "sha256:def456..."
42//!
43//! [[srcs]]
44//! type = "build"
45//! name = "registry"
46//! url = "https://registry.example.com"
47//! hash = "sha256:ghi789..."
48//! ```
49//!
50//! ## Security Features
51//!
52//! - **Cryptographic verification** using BLAKE3 hashes for atom content
53//! - **Nix-compatible hashing** for tarballs and archives
54//! - **Strict field validation** with `#[serde(deny_unknown_fields)]`
55//! - **Type-safe dependency resolution** preventing invalid configurations
56
57use std::collections::BTreeMap;
58use std::path::PathBuf;
59
60use gix::{ObjectId, url as gix_url};
61use nix_compat::nixhash::NixHash;
62use semver::Version;
63use serde::{Deserialize, Deserializer, Serialize};
64use url::Url;
65
66#[cfg(test)]
67mod test;
68
69use crate::Manifest;
70use crate::id::AtomTag;
71use crate::manifest::deps::AtomReq;
72use crate::store::QueryVersion;
73
74/// A wrapper around NixHash to provide custom serialization behavior.
75#[derive(Debug, PartialEq, PartialOrd, Eq, Clone, Serialize)]
76pub(crate) struct WrappedNixHash(pub NixHash);
77
78/// Represents different types of Git commit hashes.
79///
80/// This enum supports both SHA-1 and SHA-256 hashes, which are serialized
81/// as untagged values in TOML for maximum compatibility.
82#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Eq)]
83#[serde(untagged)]
84pub enum LockDigest {
85    /// A SHA-1 commit hash.
86    #[serde(rename = "sha1")]
87    Sha1(#[serde(with = "hex")] [u8; 20]),
88    /// A SHA-256 commit hash.
89    #[serde(rename = "sha256")]
90    Sha256(#[serde(with = "hex")] [u8; 32]),
91    /// A BLAKE-3 digest.
92    #[serde(rename = "id")]
93    Blake3(#[serde(with = "serde_base32")] [u8; 32]),
94}
95
96use crate::manifest::deps::{deserialize_url, serialize_url};
97#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Eq)]
98/// Represents the location of an atom, either as a URL or a relative path.
99///
100/// This enum is used to specify where an atom can be found, supporting both
101/// remote Git repositories and local relative paths within a repository.
102pub enum AtomLocation {
103    /// A URL pointing to a Git repository containing the atom.
104    ///
105    /// When this variant is used, the atom will be fetched from the specified
106    /// Git repository URL. If not provided, defaults to the current repository.
107    #[serde(
108        rename = "url",
109        serialize_with = "serialize_url",
110        deserialize_with = "deserialize_url"
111    )]
112    Url(gix_url::Url),
113    /// A relative path within the repository where the atom is located.
114    ///
115    /// When this variant is used, the atom is located at the specified path
116    /// relative to the current atom. If not provided, defaults to the root.
117    #[serde(rename = "path")]
118    Path(PathBuf),
119}
120
121use crate::AtomId;
122use crate::id::Name;
123use crate::store::git::Root;
124#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
125/// Represents a locked atom dependency, referencing a verifiable repository slice.
126///
127/// This struct captures all the information needed to uniquely identify and
128/// fetch a specific version of an atom from a Git repository.
129#[serde(deny_unknown_fields)]
130pub struct AtomDep {
131    /// than the tag The unique identifier of the atom.
132    pub tag: AtomTag,
133    /// The name corresponding to the atom in the manifest at `deps.atoms.<name>`, if diffferent
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub name: Option<Name>,
136    /// The semantic version of the atom.
137    pub version: Version,
138    /// The location of the atom, whether local or remote.
139    ///
140    /// This field is flattened in the TOML serialization and omitted if None.
141    #[serde(flatten)]
142    pub location: AtomLocation,
143    /// The resolved Git revision (commit hash) for verification.
144    pub rev: LockDigest,
145    /// than cryptographic identity of the atom.
146    pub id: LockDigest,
147}
148
149#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
150/// Represents a direct pin to an external source, such as a URL or tarball.
151///
152/// This struct is used for dependencies that are pinned to specific URLs
153/// with integrity verification through cryptographic hashes.
154#[serde(deny_unknown_fields)]
155pub struct PinDep {
156    /// The name of the pinned source.
157    pub name: Name,
158    /// The URL of the source.
159    pub url: Url,
160    /// The hash for integrity verification (e.g., sha256).
161    hash: WrappedNixHash,
162    /// The relative path within the source (for Nix imports).
163    ///
164    /// This field is omitted from serialization if None.
165    #[serde(skip_serializing_if = "Option::is_none")]
166    pub path: Option<PathBuf>,
167}
168
169#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
170/// Represents a pinned Git repository with a specific revision.
171///
172/// This struct is used for dependencies that are pinned to specific Git
173/// repositories and commits, providing both URL and revision information.
174#[serde(deny_unknown_fields)]
175pub struct PinGitDep {
176    /// The name of the pinned Git source.
177    pub name: Name,
178    /// The Git repository URL.
179    pub url: Url,
180    /// The resolved revision (commit hash).
181    pub rev: LockDigest,
182    /// The relative path within the repo.
183    ///
184    /// This field is omitted from serialization if None.
185    #[serde(skip_serializing_if = "Option::is_none")]
186    pub path: Option<PathBuf>,
187}
188
189#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
190/// Represents a pinned tarball or archive source.
191///
192/// This struct is used for dependencies that are distributed as tarballs
193/// or archives, with integrity verification through cryptographic hashes.
194#[serde(deny_unknown_fields)]
195pub struct PinTarDep {
196    /// The name of the tar source.
197    pub name: Name,
198    /// The URL to the tarball.
199    pub url: Url,
200    /// The hash of the tarball.
201    hash: WrappedNixHash,
202    /// The relative path within the extracted archive.
203    ///
204    /// This field is omitted from serialization if None.
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub path: Option<PathBuf>,
207}
208
209#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
210/// Represents a cross-atom source reference, acquiring a dependency from another atom.
211///
212/// This struct enables atoms to reference dependencies from other atoms,
213/// creating a composition mechanism for building complex systems from simpler parts.
214#[serde(deny_unknown_fields)]
215pub struct FromDep {
216    /// The name of the sourced dependency.
217    pub name: Name,
218    /// The atom ID from which to source.
219    pub from: AtomTag,
220    /// The name of the dependency to acquire from the 'from' atom (defaults to `name`).
221    ///
222    /// This field is omitted from serialization if None.
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub get: Option<String>,
225    /// The relative path for the sourced item (for Nix imports).
226    ///
227    /// This field is omitted from serialization if None.
228    #[serde(skip_serializing_if = "Option::is_none")]
229    pub path: Option<PathBuf>,
230}
231
232#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
233#[serde(tag = "type")]
234/// Enum representing the different types of locked dependencies, serialized as tagged TOML tables.
235///
236/// This enum provides a type-safe way to represent different kinds of dependencies
237/// in the lockfile, ensuring that each dependency type has the correct fields
238/// and validation at compile time.
239pub enum Dep {
240    /// An atom dependency variant.
241    ///
242    /// Represents a dependency on another atom, identified by its ID, version,
243    /// and Git revision.
244    #[serde(rename = "atom")]
245    Atom(AtomDep),
246    /// A direct pin to an external source variant.
247    ///
248    /// Represents a dependency pinned to a specific URL with integrity verification.
249    /// Used for dependencies that are not atoms but need to be fetched from external sources.
250    #[serde(rename = "pin")]
251    Pin(PinDep),
252    /// A Git-specific pin variant.
253    ///
254    /// Represents a dependency pinned to a specific Git repository and commit.
255    /// Similar to Pin but specifically for Git repositories.
256    #[serde(rename = "pin+git")]
257    PinGit(PinGitDep),
258    /// A tarball pin variant.
259    ///
260    /// Represents a dependency pinned to a tarball or archive file.
261    /// Used for dependencies distributed as compressed archives.
262    #[serde(rename = "pin+tar")]
263    PinTar(PinTarDep),
264    /// A cross-atom source reference variant.
265    ///
266    /// Represents a dependency that is sourced from another atom, enabling
267    /// composition of complex systems from simpler atom components.
268    #[serde(rename = "from")]
269    From(FromDep),
270    /// A reference to a build source.
271    ///
272    /// Represents a source that needs to be fetched and available during the
273    /// build process, such as source code or configuration file.
274    #[serde(rename = "build")]
275    Build(BuildSrc),
276}
277
278#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
279#[serde(deny_unknown_fields)]
280/// Represents a locked build-time source, such as a registry or configuration.
281///
282/// This struct is used for sources that are fetched during the build process,
283/// such as package registries or configuration files that need to be available
284/// at build time.
285pub struct BuildSrc {
286    /// The name of the source.
287    pub name: Name,
288    /// The URL to fetch the source.
289    pub url: Url,
290    /// The hash for verification.
291    hash: WrappedNixHash,
292}
293
294/// A wrapper for `BTreeMap` that ensures consistent ordering for serialization
295/// and minimal diffs in the lockfile. It maps dependency names to their locked
296/// representations.
297#[derive(Debug, PartialEq, Eq)]
298pub(crate) struct DepMap<Deps>(BTreeMap<Name, Deps>);
299
300#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
301/// The root structure for the lockfile, containing resolved dependencies and sources.
302///
303/// This struct represents the complete lockfile format used by atom to capture
304/// the exact versions and revisions of all dependencies for reproducible builds.
305/// The lockfile ensures that builds are deterministic and can be reproduced
306/// across different environments.
307#[serde(deny_unknown_fields)]
308pub struct Lockfile {
309    /// The version of the lockfile schema.
310    ///
311    /// This field allows for future evolution of the lockfile format while
312    /// maintaining backward compatibility.
313    pub version: u8,
314    /// The list of locked dependencies (absent or empty if none).
315    ///
316    /// This field contains all the resolved dependencies with their exact
317    /// versions and revisions. It is omitted from serialization if None or empty.
318    #[serde(default, skip_serializing_if = "DepMap::is_empty")]
319    pub(crate) deps: DepMap<Dep>,
320}
321
322#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
323/// The resolution mode for generating the lockfile.
324///
325/// This enum controls how dependencies are resolved when generating a lockfile,
326/// determining whether to lock only direct dependencies or recursively resolve
327/// all transitive dependencies.
328pub enum ResolutionMode {
329    /// Shallow resolution: Lock only direct dependencies.
330    ///
331    /// In this mode, only the immediate dependencies declared in the manifest
332    /// are resolved and locked. Transitive dependencies are not included in
333    /// the lockfile, making it faster but less comprehensive.
334    #[serde(rename = "shallow")]
335    Shallow,
336    /// Deep resolution: Recursively lock all transitive dependencies (future).
337    ///
338    /// In this mode, all dependencies and their dependencies are recursively
339    /// resolved and locked, ensuring complete reproducibility but requiring
340    /// more time and resources. This feature is planned for future implementation.
341    #[serde(rename = "deep")]
342    Deep,
343}
344
345impl<'de> Deserialize<'de> for WrappedNixHash {
346    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
347    where
348        D: Deserializer<'de>,
349    {
350        // Deserialize into a String to handle owned data
351        let s = String::deserialize(deserializer)?;
352        // Pass the String as &str to NixHash::from_str
353        let hash = NixHash::from_str(&s, None).map_err(|_| {
354            serde::de::Error::invalid_value(serde::de::Unexpected::Str(&s), &"NixHash")
355        })?;
356        Ok(WrappedNixHash(hash))
357    }
358}
359
360impl From<ObjectId> for LockDigest {
361    fn from(id: ObjectId) -> Self {
362        match id {
363            ObjectId::Sha1(bytes) => LockDigest::Sha1(bytes),
364        }
365    }
366}
367
368use base32::{self};
369use serde::Serializer;
370
371mod serde_base32 {
372    use super::*;
373
374    pub fn serialize<S>(hash: &[u8; 32], serializer: S) -> Result<S::Ok, S::Error>
375    where
376        S: Serializer,
377    {
378        let encoded = base32::encode(crate::BASE32, hash);
379        serializer.serialize_str(&encoded)
380    }
381
382    pub fn deserialize<'de, D>(deserializer: D) -> Result<[u8; 32], D::Error>
383    where
384        D: Deserializer<'de>,
385    {
386        let s = String::deserialize(deserializer)?;
387        base32::decode(crate::BASE32, &s)
388            .ok_or_else(|| serde::de::Error::custom("Invalid Base32 string"))
389            .and_then(|bytes| {
390                bytes
391                    .try_into()
392                    .map_err(|_| serde::de::Error::custom("Expected 32 bytes for BLAKE3 hash"))
393            })
394    }
395}
396
397impl From<AtomId<Root>> for LockDigest {
398    fn from(value: AtomId<Root>) -> Self {
399        use crate::Compute;
400
401        LockDigest::Blake3(*value.compute_hash())
402    }
403}
404
405impl Default for Lockfile {
406    fn default() -> Self {
407        Self {
408            version: 1,
409            deps: Default::default(),
410        }
411    }
412}
413
414impl<T> AsRef<BTreeMap<Name, T>> for DepMap<T> {
415    fn as_ref(&self) -> &BTreeMap<Name, T> {
416        let DepMap(map) = self;
417        map
418    }
419}
420
421impl<T> AsMut<BTreeMap<Name, T>> for DepMap<T> {
422    fn as_mut(&mut self) -> &mut BTreeMap<Name, T> {
423        let DepMap(map) = self;
424        map
425    }
426}
427
428impl<T: Clone + Serialize> Serialize for DepMap<T> {
429    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
430    where
431        S: Serializer,
432    {
433        // BTreeMap iterates in sorted order automatically.
434        let values: Vec<_> = self.as_ref().values().cloned().collect();
435        values.serialize(serializer)
436    }
437}
438
439impl<'de> Deserialize<'de> for DepMap<Dep> {
440    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
441    where
442        D: Deserializer<'de>,
443    {
444        let entries: Vec<Dep> = Vec::deserialize(deserializer)?;
445        let mut map = BTreeMap::new();
446        for dep in entries {
447            match dep {
448                Dep::Atom(atom_dep) => {
449                    let key = if let Some(n) = &atom_dep.name {
450                        n
451                    } else {
452                        &atom_dep.tag
453                    };
454                    map.insert(key.to_owned(), Dep::Atom(atom_dep));
455                },
456                Dep::Pin(pin_dep) => {
457                    map.insert(pin_dep.name.to_owned(), Dep::Pin(pin_dep));
458                },
459                Dep::PinGit(pin_git_dep) => {
460                    map.insert(pin_git_dep.name.to_owned(), Dep::PinGit(pin_git_dep));
461                },
462                Dep::PinTar(pin_tar_dep) => {
463                    map.insert(pin_tar_dep.name.to_owned(), Dep::PinTar(pin_tar_dep));
464                },
465                Dep::From(from_dep) => {
466                    map.insert(from_dep.name.to_owned(), Dep::From(from_dep));
467                },
468                Dep::Build(build_dep) => {
469                    map.insert(build_dep.name.to_owned(), Dep::Build(build_dep));
470                },
471            }
472        }
473        Ok(DepMap(map))
474    }
475}
476
477impl<T> DepMap<T> {
478    fn is_empty(&self) -> bool {
479        self.as_ref().is_empty()
480    }
481}
482
483impl<T> Default for DepMap<T> {
484    fn default() -> Self {
485        Self(Default::default())
486    }
487}
488
489impl Lockfile {
490    /// Removes any dependencies from the lockfile that are no longer present in the
491    /// manifest, ensuring the lockfile only contains entries that are still relevant.
492    pub(crate) fn sanitize(&mut self, manifest: &Manifest) {
493        self.deps
494            .as_mut()
495            .retain(|k, _| manifest.deps.contains_key(k));
496        self.synchronize(manifest);
497    }
498
499    /// Updates the lockfile to match the dependencies specified in the manifest.
500    /// It resolves any new dependencies, updates existing ones if their version
501    /// requirements have changed, and ensures the lockfile is fully consistent.
502    pub(crate) fn synchronize(&mut self, manifest: &Manifest) {
503        for (k, v) in manifest.deps.iter() {
504            if !self.deps.as_ref().contains_key(k) {
505                match v {
506                    crate::manifest::deps::Dependency::Atom(atom_req) => {
507                        if let Ok(dep) = atom_req.resolve(k) {
508                            self.deps.as_mut().insert(k.to_owned(), Dep::Atom(dep));
509                        } else {
510                            tracing::warn!(message = "unlocked dependency could not be resolved", key = %k);
511                        };
512                    },
513                    crate::manifest::deps::Dependency::Pin(_) => todo!(),
514                    crate::manifest::deps::Dependency::Src(_) => todo!(),
515                }
516            } else {
517                match v {
518                    crate::manifest::deps::Dependency::Atom(atom_req) => {
519                        let req = atom_req.version();
520                        if let Some(Dep::Atom(dep)) = self.deps.as_ref().get(k) {
521                            if !req.matches(&dep.version) {
522                                tracing::warn!(message = "updating out of date dependency in accordance with spec", key = %k);
523                                if let Ok(dep) = atom_req.resolve(k) {
524                                    self.deps.as_mut().insert(k.to_owned(), Dep::Atom(dep));
525                                } else {
526                                    tracing::warn!(message = "out of sync dependency could not be resolved, check the version spec", key = %k);
527                                };
528                            }
529                        } else if let Ok(dep) = atom_req.resolve(k) {
530                            self.deps.as_mut().insert(k.to_owned(), Dep::Atom(dep));
531                        } else {
532                            tracing::warn!(message = "dependency is mislabeled as inproper type, and attempts to rectify failed", key = %k);
533                        };
534                    },
535                    crate::manifest::deps::Dependency::Pin(_) => todo!(),
536                    crate::manifest::deps::Dependency::Src(_) => todo!(),
537                }
538            }
539        }
540    }
541}
542
543impl AtomReq {
544    /// Resolves an `AtomReq` to a fully specified `AtomDep` by querying the
545    /// remote Git repository to find the highest matching version and its
546    /// corresponding commit hash.
547    ///
548    /// # Arguments
549    ///
550    /// * `key` - The name of the dependency as specified in the manifest, which may differ from the
551    ///   atom's tag.
552    ///
553    /// # Returns
554    ///
555    /// A `Result` containing the resolved `AtomDep` or a `git::Error` if
556    /// resolution fails.
557    pub(crate) fn resolve(&self, key: &AtomTag) -> Result<AtomDep, crate::store::git::Error> {
558        let url = self.store();
559
560        let atoms = url.get_atoms(None)?;
561        let tag = if let Some(tag) = self.tag() {
562            tag.to_owned()
563        } else {
564            // TODO: see if we can find a way to avoid incorrectly encoding the wrong tag here if
565            // the wrong key is passed. Perhaps a non-serialized field which unconditionally stores
566            // the `AtomId`, to remain unambiguous?
567            key.to_owned()
568        };
569        let (version, oid) = <gix_url::Url as QueryVersion<_, _, _, _>>::process_highest_match(
570            atoms.clone(),
571            &tag,
572            self.version(),
573        )
574        .ok_or(crate::store::git::Error::NoMatchingVersion)?;
575        let name = (key != &tag).then_some(key.to_owned());
576        let id = AtomId::construct(&atoms, tag.to_owned())?;
577
578        Ok(AtomDep {
579            tag: tag.to_owned(),
580            name,
581            version,
582            location: if let gix_url::Scheme::File = url.scheme {
583                AtomLocation::Path(url.path.to_string().into())
584            } else {
585                AtomLocation::Url(url.to_owned())
586            },
587            rev: match oid {
588                ObjectId::Sha1(bytes) => LockDigest::Sha1(bytes),
589            },
590            id: id.into(),
591        })
592    }
593}