atom/store/
git.rs

1//! # Atom Git Store
2//!
3//! This module contains the foundational types for the Git implementation of an Ekala store.
4//!
5//! In particular, the implementation to initialize ([`Init`]) a Git repository as an Ekala store
6//! is contained here, as well as the type representing the [`Root`] of history used for an
7//! [`crate::AtomId`].
8#[cfg(test)]
9pub(crate) mod test;
10
11use std::sync::OnceLock;
12
13use bstr::BStr;
14use gix::discover::upwards::Options;
15use gix::protocol::handshake::Ref;
16use gix::protocol::transport::client::Transport;
17use gix::sec::Trust;
18use gix::sec::trust::Mapping;
19use gix::{Commit, ObjectId, ThreadSafeRepository};
20use thiserror::Error as ThisError;
21
22use crate::id::Origin;
23use crate::store::QueryVersion;
24
25/// An error encountered during initialization or other git store operations.
26#[derive(ThisError, Debug)]
27pub enum Error {
28    /// No git ref found.
29    #[error("No ref named `{0}` found for remote `{1}`")]
30    NoRef(String, String),
31    /// No remote url configured
32    #[error("No `{0}` url configured for remote `{1}`")]
33    NoUrl(String, String),
34    /// This git repository does not have a working directory.
35    #[error("Repository does not have a working directory")]
36    NoWorkDir,
37    /// The repository root calculation failed.
38    #[error("Failed to calculate the repositories root commit")]
39    RootNotFound,
40    /// The calculated root does not match what was reported by the remote.
41    #[error("The calculated root does not match the reported one")]
42    RootInconsistent,
43    /// The requested version is not contained on the remote.
44    #[error("The version requested does not exist on the remote")]
45    NoMatchingVersion,
46    /// A transparent wrapper for a [`gix::revision::walk::Error`]
47    #[error(transparent)]
48    WalkFailure(#[from] gix::revision::walk::Error),
49    /// A transparent wrapper for a [`std::io::Error`]
50    #[error(transparent)]
51    Io(#[from] std::io::Error),
52    /// A transparent wrapper for a [`std::path::StripPrefixError`]
53    #[error(transparent)]
54    NormalizationFailed(#[from] std::path::StripPrefixError),
55    /// A transparent wrapper for a [`Box<gix::remote::find::existing::Error>`]
56    #[error(transparent)]
57    NoRemote(#[from] Box<gix::remote::find::existing::Error>),
58    /// A transparent wrapper for a [`Box<gix::remote::connect::Error>`]
59    #[error(transparent)]
60    Connect(#[from] Box<gix::remote::connect::Error>),
61    /// A transparent wrapper for a [`Box<gix::remote::fetch::prepare::Error>`]
62    #[error(transparent)]
63    Refs(#[from] Box<gix::remote::fetch::prepare::Error>),
64    /// A transparent wrapper for a [`Box<gix::remote::fetch::Error>`]
65    #[error(transparent)]
66    Fetch(#[from] Box<gix::remote::fetch::Error>),
67    /// A transparent wrapper for a [`Box<gix::object::find::existing::with_conversion::Error>`]
68    #[error(transparent)]
69    NoCommit(#[from] Box<gix::object::find::existing::with_conversion::Error>),
70    /// A transparent wrapper for a [`Box<gix::refspec::parse::Error>`]
71    #[error(transparent)]
72    AddRefFailed(#[from] Box<gix::refspec::parse::Error>),
73    /// A transparent wrapper for a [`Box<gix::reference::edit::Error>`]
74    #[error(transparent)]
75    WriteRef(#[from] Box<gix::reference::edit::Error>),
76    /// A transparent wrapper for a [`gix::protocol::transport::client::connect::Error`]
77    #[error(transparent)]
78    Connection(#[from] gix::protocol::transport::client::connect::Error),
79    /// A transparent wrapper for a [`gix::config::credential_helpers::Error`]
80    #[error(transparent)]
81    Creds(#[from] gix::config::credential_helpers::Error),
82    /// A transparent wrapper for a [`gix::config::file::init::from_paths::Error`]
83    #[error(transparent)]
84    File(#[from] gix::config::file::init::from_paths::Error),
85    /// A transparent wrapper for a [`gix::protocol::handshake::Error`]
86    #[error(transparent)]
87    Handshake(#[from] Box<gix::protocol::handshake::Error>),
88    /// A transparent wrapper for a [`gix::refspec::parse::Error`]
89    #[error(transparent)]
90    Refspec(#[from] gix::refspec::parse::Error),
91    /// A transparent wrapper for a [`gix::refspec::parse::Error`]
92    #[error(transparent)]
93    Refmap(#[from] gix::protocol::fetch::refmap::init::Error),
94    /// A transparent wrapper for a [`gix::refspec::parse::Error`]
95    #[error(transparent)]
96    UrlParse(#[from] gix::url::parse::Error),
97}
98
99impl Error {
100    pub(crate) fn warn(self) -> Self {
101        tracing::warn!(message = %self);
102        self
103    }
104}
105
106/// Provide a lazyily instantiated static reference to the git repository.
107static REPO: OnceLock<Option<ThreadSafeRepository>> = OnceLock::new();
108
109use std::borrow::Cow;
110static DEFAULT_REMOTE: OnceLock<Cow<str>> = OnceLock::new();
111
112/// The wrapper type for the underlying type which will be used to represent
113/// the "root" identifier for an [`crate::AtomId`]. For git, this is a [`gix::ObjectId`]
114/// representing the original commit made in the repositories history.
115///
116/// The wrapper helps disambiguate at the type level between object ids and the root id.
117#[derive(Clone, Copy, Debug, PartialEq, Eq)]
118pub struct Root(ObjectId);
119
120/// Return a static reference the the local Git repository.
121pub fn repo() -> Result<Option<&'static ThreadSafeRepository>, Box<gix::discover::Error>> {
122    let mut error = None;
123    let repo = REPO.get_or_init(|| match get_repo() {
124        Ok(repo) => Some(repo),
125        Err(e) => {
126            error = Some(e);
127            None
128        },
129    });
130    if let Some(e) = error {
131        Err(e)
132    } else {
133        Ok(repo.as_ref())
134    }
135}
136
137use std::io;
138/// Run's the git binary, returning the output or the err, depending on the return value.
139///
140/// Note: We rely on this only for operations that are not yet implemented in GitOxide.
141///       Once push is implemented upstream, we can, and should, remove this.
142pub fn run_git_command(args: &[&str]) -> io::Result<Vec<u8>> {
143    use std::process::Command;
144    let output = Command::new("git").args(args).output()?;
145
146    if output.status.success() {
147        Ok(output.stdout)
148    } else {
149        Err(io::Error::other(String::from_utf8_lossy(&output.stderr)))
150    }
151}
152
153fn get_repo() -> Result<ThreadSafeRepository, Box<gix::discover::Error>> {
154    let opts = Options {
155        required_trust: Trust::Full,
156        ..Default::default()
157    };
158    ThreadSafeRepository::discover_opts(".", opts, Mapping::default()).map_err(Box::new)
159}
160
161/// Return a static reference to the default remote configured for pushing
162pub fn default_remote() -> &'static str {
163    use gix::remote::Direction;
164    DEFAULT_REMOTE
165        .get_or_init(|| {
166            repo()
167                .ok()
168                .flatten()
169                .and_then(|repo| {
170                    repo.to_thread_local()
171                        .remote_default_name(Direction::Push)
172                        .map(|s| s.to_string().into())
173                })
174                .unwrap_or("origin".into())
175        })
176        .as_ref()
177}
178
179use std::ops::Deref;
180impl Deref for Root {
181    type Target = ObjectId;
182
183    fn deref(&self) -> &Self::Target {
184        &self.0
185    }
186}
187
188type AtomQuery = (AtomTag, Version, ObjectId);
189impl Origin<Root> for std::vec::IntoIter<AtomQuery> {
190    type Error = Error;
191
192    fn calculate_origin(&self) -> Result<Root, Self::Error> {
193        let root = <gix::Url as QueryVersion<_, _, _, _>>::process_root(self.to_owned())
194            .ok_or(Error::RootNotFound)?;
195        Ok(Root(root))
196    }
197}
198
199impl<'a> Origin<Root> for Commit<'a> {
200    type Error = Error;
201
202    fn calculate_origin(&self) -> Result<Root, Self::Error> {
203        use gix::revision::walk::Sorting;
204        use gix::traverse::commit::simple::CommitTimeOrder;
205        let mut walk = self
206            .ancestors()
207            .use_commit_graph(true)
208            .sorting(Sorting::ByCommitTime(CommitTimeOrder::OldestFirst))
209            .all()?;
210
211        while let Some(Ok(info)) = walk.next() {
212            if info.parent_ids.is_empty() {
213                return Ok(Root(info.id));
214            }
215        }
216
217        Err(Error::RootNotFound)
218    }
219}
220
221use std::path::{Path, PathBuf};
222
223use gix::Repository;
224
225use super::{NormalizeStorePath, QueryStore};
226
227impl NormalizeStorePath for Repository {
228    type Error = Error;
229
230    fn normalize<P: AsRef<Path>>(&self, path: P) -> Result<PathBuf, Error> {
231        use std::fs;
232
233        use path_clean::PathClean;
234        let path = path.as_ref();
235
236        let rel_repo_root = self.workdir().ok_or(Error::NoWorkDir)?;
237        let repo_root = fs::canonicalize(rel_repo_root)?;
238        let current = self.current_dir();
239        let rel = current.join(path).clean();
240
241        rel.strip_prefix(&repo_root)
242            .map_or_else(
243                |e| {
244                    // handle absolute paths as if they were relative to the repo root
245                    if !path.is_absolute() {
246                        return Err(e);
247                    }
248                    let cleaned = path.clean();
249                    // Preserve the platform-specific root
250                    let p = cleaned.strip_prefix(Path::new("/"))?;
251                    repo_root
252                        .join(p)
253                        .clean()
254                        .strip_prefix(&repo_root)
255                        .map(Path::to_path_buf)
256                },
257                |p| Ok(p.to_path_buf()),
258            )
259            .map_err(|e| {
260                tracing::warn!(
261                    message = "Ignoring path outside repo root",
262                    path = %path.display(),
263                );
264                Error::NormalizationFailed(e)
265            })
266    }
267}
268
269impl AsRef<[u8]> for Root {
270    fn as_ref(&self) -> &[u8] {
271        self.as_bytes()
272    }
273}
274
275trait EkalaRemote {
276    type Error;
277    const ANONYMOUS: &str = "<unamed>";
278    fn try_symbol(&self) -> Result<&str, Self::Error>;
279    fn symbol(&self) -> &str {
280        self.try_symbol().unwrap_or(Self::ANONYMOUS)
281    }
282}
283
284impl<'repo> EkalaRemote for gix::Remote<'repo> {
285    type Error = Error;
286
287    fn try_symbol(&self) -> Result<&str, Self::Error> {
288        use gix::remote::Name;
289        self.name()
290            .and_then(Name::as_symbol)
291            .ok_or(Error::NoRemote(Box::new(
292                gix::remote::find::existing::Error::NotFound {
293                    name: Self::ANONYMOUS.into(),
294                },
295            )))
296    }
297}
298
299pub(super) const V1_ROOT: &str = "refs/tags/ekala/root/v1";
300const V1_ROOT_SEMVER: &str = "1.0.0";
301
302fn to_id(r: Ref) -> ObjectId {
303    let (_, t, p) = r.unpack();
304    // unwrap can't fail here as at least one of these is guaranteed Some
305    p.or(t).map(ToOwned::to_owned).unwrap()
306}
307
308use super::Init;
309impl<'repo> Init<Root, Ref, Box<dyn Transport + Send>> for gix::Remote<'repo> {
310    type Error = Error;
311
312    /// Determines if this remote is a valid Ekala store by pulling HEAD and the root
313    /// tag, ensuring the latter is actually the root of HEAD, returning the root.
314    #[tracing::instrument(skip(transport))]
315    fn ekala_root(
316        &self,
317        transport: Option<&mut Box<dyn Transport + Send>>,
318    ) -> Result<Root, Self::Error> {
319        use crate::id::Origin;
320
321        let span = tracing::Span::current();
322        crate::log::set_sub_task(&span, "💪 ensuring consistency with remote");
323
324        let repo = self.repo();
325        self.get_refs(["HEAD", V1_ROOT], transport).map(|i| {
326            let mut i = i.into_iter();
327            let root_for = |i: &mut dyn Iterator<Item = Ref>| {
328                i.next()
329                    .ok_or(Error::NoRef(V1_ROOT.to_owned(), self.symbol().to_owned()))
330                    .and_then(|r| {
331                        let id = to_id(r);
332                        Ok(repo.find_commit(id).map_err(Box::new)?)
333                    })
334                    .and_then(|c| {
335                        if c.parent_ids().count() != 0 {
336                            c.calculate_origin().map(|r| *r)
337                        } else {
338                            Ok(c.id)
339                        }
340                    })
341            };
342
343            let fst = root_for(&mut i)?;
344            let snd = root_for(&mut i)?;
345            if fst == snd {
346                Ok(Root(fst))
347            } else {
348                Err(Error::RootInconsistent)
349            }
350        })?
351    }
352
353    /// Sync with the given remote and get the most up to date HEAD according to it.
354    fn sync(&self, transport: Option<&mut Box<dyn Transport + Send>>) -> Result<Ref, Error> {
355        self.get_ref("HEAD", transport)
356    }
357
358    /// Initialize the repository by calculating the root, according to the latest HEAD.
359    fn ekala_init(&self, transport: Option<&mut Box<dyn Transport + Send>>) -> Result<(), Error> {
360        use gix::refs::transaction::PreviousValue;
361
362        use crate::Origin;
363
364        let name = self.try_symbol()?;
365        let head = to_id(self.sync(transport)?);
366        let repo = self.repo();
367        let root = *repo
368            .find_commit(head)
369            .map_err(Box::new)?
370            .calculate_origin()?;
371
372        let root_ref = repo
373            .reference(V1_ROOT, root, PreviousValue::MustNotExist, "init: root")
374            .map_err(Box::new)?
375            .name()
376            .as_bstr()
377            .to_string();
378
379        // FIXME: use gix for push once it supports it
380        run_git_command(&[
381            "-C",
382            repo.git_dir().to_string_lossy().as_ref(),
383            "push",
384            name,
385            format!("{root_ref}:{root_ref}").as_str(),
386        ])?;
387        tracing::info!(remote = name, message = "Successfully initialized");
388        Ok(())
389    }
390}
391
392type ProgressRange = std::ops::RangeInclusive<prodash::progress::key::Level>;
393const STANDARD_RANGE: ProgressRange = 2..=2;
394
395fn setup_line_renderer(
396    progress: &std::sync::Arc<prodash::tree::Root>,
397) -> prodash::render::line::JoinHandle {
398    prodash::render::line(
399        std::io::stderr(),
400        std::sync::Arc::downgrade(progress),
401        prodash::render::line::Options {
402            level_filter: Some(STANDARD_RANGE),
403            initial_delay: Some(std::time::Duration::from_millis(500)),
404            throughput: true,
405            ..prodash::render::line::Options::default()
406        }
407        .auto_configure(prodash::render::line::StreamKind::Stderr),
408    )
409}
410
411impl super::QueryStore<Ref, Box<dyn Transport + Send>> for gix::Url {
412    type Error = Error;
413
414    /// Efficiently queries git references from a remote repository URL.
415    ///
416    /// This implementation performs a lightweight network operation that only retrieves
417    /// reference information (branch/tag names and their commit IDs) without downloading
418    /// the actual repository objects. This makes it ideal for scenarios where you need
419    /// to check reference existence or get commit IDs without the overhead of a full
420    /// repository fetch.
421    ///
422    /// ## Network Behavior
423    /// - **Lightweight**: Only queries reference metadata, not repository content
424    /// - **Fast**: Minimal network overhead compared to full fetch operations
425    /// - **Efficient**: Suitable for checking reference existence and getting commit IDs
426    ///
427    /// ## Use Cases
428    /// - Checking if specific branches or tags exist on a remote
429    /// - Getting commit IDs for references without downloading objects
430    /// - Lightweight remote repository inspection
431    ///
432    /// ## Performance
433    /// This is significantly faster than the [`gix::Remote`] implementation since it
434    /// avoids downloading actual git objects, making it appropriate for read-only
435    /// reference queries.
436    fn get_refs<Spec>(
437        &self,
438        targets: impl IntoIterator<Item = Spec>,
439        transport: Option<&mut Box<dyn Transport + Send>>,
440    ) -> std::result::Result<
441        impl std::iter::IntoIterator<Item = Ref>,
442        <Self as super::QueryStore<Ref, Box<dyn Transport + Send>>>::Error,
443    >
444    where
445        Spec: AsRef<BStr>,
446    {
447        use gix::open::permissions::Environment;
448        use gix::refspec::RefSpec;
449        use gix::sec::Permission;
450
451        let transport = if let Some(transport) = transport {
452            transport
453        } else {
454            &mut self.get_transport()?
455        };
456
457        let config = gix::config::File::from_globals()?;
458        let (mut cascade, _, prompt_opts) = gix::config::credential_helpers(
459            self.to_owned(),
460            &config,
461            true,
462            gix::config::section::is_trusted,
463            Environment {
464                xdg_config_home: Permission::Allow,
465                home: Permission::Allow,
466                http_transport: Permission::Allow,
467                identity: Permission::Allow,
468                objects: Permission::Allow,
469                git_prefix: Permission::Allow,
470                ssh_prefix: Permission::Allow,
471            },
472            false,
473        )?;
474
475        let authenticate = Box::new(move |action| cascade.invoke(action, prompt_opts.clone()));
476
477        let mut handshake = gix::protocol::fetch::handshake(
478            &mut *transport,
479            authenticate,
480            Vec::new(),
481            &mut prodash::progress::Discard,
482        )
483        .map_err(Box::new)?;
484
485        use gix::refspec::parse::Operation;
486        let refs: Vec<_> = targets
487            .into_iter()
488            .map(|t| gix::refspec::parse(t.as_ref(), Operation::Fetch).map(RefSpec::from))
489            .collect::<Result<Vec<_>, _>>()?;
490
491        use gix::protocol::fetch::refmap::init::Options as RefOptions;
492        use gix::protocol::fetch::{Context, RefMap};
493
494        let context = Context {
495            handshake: &mut handshake,
496            transport,
497            user_agent: ("agent", Some(gix::env::agent().into())),
498            trace_packetlines: true,
499        };
500
501        let refmap = RefMap::new(
502            prodash::progress::Discard,
503            refs.as_slice(),
504            context,
505            RefOptions::default(),
506        )?;
507        Ok(refmap.remote_refs)
508    }
509
510    fn get_transport(&self) -> Result<Box<dyn Transport + Send>, Self::Error> {
511        use gix::protocol::transport::client::connect::Options;
512        let transport = gix::protocol::transport::connect(self.to_owned(), Options::default())?;
513        Ok(Box::new(transport))
514    }
515
516    fn get_ref<Spec>(
517        &self,
518        target: Spec,
519        transport: Option<&mut Box<dyn Transport + Send>>,
520    ) -> Result<Ref, Self::Error>
521    where
522        Spec: AsRef<BStr>,
523    {
524        let name = target.as_ref().to_string();
525        self.get_refs(Some(target), transport).and_then(|r| {
526            r.into_iter()
527                .next()
528                .ok_or(Error::NoRef(name, self.to_string()))
529        })
530    }
531}
532
533impl<'repo> super::QueryStore<Ref, Box<dyn Transport + Send>> for gix::Remote<'repo> {
534    type Error = Error;
535
536    /// Performs a full git fetch operation to retrieve references and repository data.
537    ///
538    /// This implementation executes a complete git fetch operation, which downloads
539    /// both reference information and the actual repository objects (commits, trees,
540    /// blobs) from the remote. This provides full access to the repository content
541    /// but is significantly more expensive than the URL-based implementation.
542    ///
543    /// ## Network Behavior
544    /// - **Heavyweight**: Performs a full git fetch operation, downloading all objects
545    /// - **Complete**: Provides access to the entire repository state after fetching
546    /// - **Expensive**: Higher network usage and longer execution time
547    ///
548    /// ## Use Cases
549    /// - When you need to access repository content after fetching references
550    /// - When working with local repositories that need to sync with remotes
551    /// - When you require the complete repository state, not just reference metadata
552    ///
553    /// ## Performance
554    /// This implementation is slower and uses more network bandwidth than the
555    /// [`gix::Url`] implementation because it downloads actual git objects.
556    /// Use it only when you need access to repository content beyond reference metadata.
557    ///
558    /// ## Progress Reporting
559    /// The fetch operation includes progress reporting for sync and initialization phases.
560    /// Progress is displayed when the log level is set above WARN.
561    fn get_refs<Spec>(
562        &self,
563        references: impl IntoIterator<Item = Spec>,
564        transport: Option<&mut Box<dyn Transport + Send>>,
565    ) -> std::result::Result<
566        impl IntoIterator<Item = Ref>,
567        <Self as super::QueryStore<Ref, Box<dyn Transport + Send>>>::Error,
568    >
569    where
570        Spec: AsRef<BStr>,
571    {
572        use std::sync::atomic::AtomicBool;
573
574        use gix::progress::prodash::tree::Root;
575        use gix::remote::Direction;
576        use gix::remote::fetch::Tags;
577        use gix::remote::ref_map::Options;
578        use tracing::level_filters::LevelFilter;
579
580        let tree = Root::new();
581        let sync_progress = tree.add_child("sync");
582        let init_progress = tree.add_child("init");
583        let _ = if LevelFilter::current() > LevelFilter::WARN {
584            Some(setup_line_renderer(&tree))
585        } else {
586            None
587        };
588
589        let mut remote = self.clone().with_fetch_tags(Tags::None);
590
591        remote
592            .replace_refspecs(references, Direction::Fetch)
593            .map_err(Box::new)?;
594
595        let transport = if let Some(transport) = transport {
596            transport
597        } else {
598            &mut remote.get_transport()?
599        };
600
601        let client = remote.to_connection_with_transport(transport);
602
603        let query = client
604            .prepare_fetch(sync_progress, Options::default())
605            .map_err(Box::new)?;
606
607        let outcome = query
608            .with_write_packed_refs_only(true)
609            .receive(init_progress, &AtomicBool::new(false))
610            .map_err(Box::new)?;
611
612        Ok(outcome.ref_map.remote_refs)
613    }
614
615    fn get_transport(&self) -> Result<Box<dyn Transport + Send>, Self::Error> {
616        use gix::remote::Direction;
617        let url = self
618            .url(Direction::Fetch)
619            .ok_or_else(|| Error::NoUrl("fetch".to_string(), self.symbol().to_string()))?;
620        url.get_transport()
621    }
622
623    fn get_ref<Spec>(
624        &self,
625        target: Spec,
626        transport: Option<&mut Box<dyn Transport + Send>>,
627    ) -> Result<Ref, Self::Error>
628    where
629        Spec: AsRef<BStr>,
630    {
631        let name = target.as_ref().to_string();
632        self.get_refs(Some(target), transport).and_then(|r| {
633            r.into_iter()
634                .next()
635                .ok_or(Error::NoRef(name, self.symbol().to_owned()))
636        })
637    }
638}
639
640use semver::Version;
641
642use crate::AtomTag;
643impl super::UnpackRef<ObjectId> for Ref {
644    fn unpack_atom_ref(&self) -> Option<super::UnpackedRef<ObjectId>> {
645        let maybe_root = self.find_root_ref();
646        if let Some(root) = maybe_root {
647            return Some((
648                AtomTag::root_tag(),
649                Version::parse(V1_ROOT_SEMVER).ok()?,
650                root,
651            ));
652        }
653        let (n, t, p) = self.unpack();
654        let mut path = PathBuf::from(n.to_string());
655        let v_str = path.file_name()?.to_str()?;
656        let version = Version::parse(v_str).ok()?;
657        path.pop();
658        let a_str = path.file_name()?.to_str()?;
659        let tag = AtomTag::try_from(a_str).ok()?;
660        let id = p.or(t).map(ToOwned::to_owned)?;
661
662        Some((tag, version, id))
663    }
664
665    fn find_root_ref(&self) -> Option<ObjectId> {
666        if let Ref::Direct {
667            full_ref_name: name,
668            object: id,
669        } = self
670        {
671            if name == V1_ROOT {
672                return Some(id.to_owned());
673            }
674        }
675        None
676    }
677}
678
679type Refs = Vec<super::UnpackedRef<ObjectId>>;
680impl QueryVersion<Ref, ObjectId, Refs, Box<dyn Transport + Send>> for gix::Url {}
681impl<'repo> QueryVersion<Ref, ObjectId, Refs, Box<dyn Transport + Send>> for gix::Remote<'repo> {}