watchexec/crates/project-origins/src/lib.rs

388 lines
12 KiB
Rust
Raw Normal View History

2022-06-15 05:25:05 +02:00
//! Resolve project origins and kinds from a path.
//!
//! This crate originated in [Watchexec](https://docs.rs/watchexec): it is used to resolve where a
//! project's origin (or root) is, starting either at that origin, or within a subdirectory of it.
//!
//! This crate also provides the kind of project it is, and defines two categories within this:
//! version control systems, and software development environments.
//!
//! As it is possible to find several project origins, of different or similar kinds, from a given
//! directory and walking up, [`origins`] returns a set, rather than a single path. Determining
//! which of these is the "one true origin" (if necessary) is left to the caller.
2021-10-09 07:45:32 +02:00
use std::{
2021-10-10 12:35:11 +02:00
collections::{HashMap, HashSet},
fs::FileType,
path::{Path, PathBuf},
};
2021-10-09 07:45:32 +02:00
2021-10-10 12:35:11 +02:00
use futures::StreamExt;
use tokio::fs::read_dir;
use tokio_stream::wrappers::ReadDirStream;
2021-10-10 06:55:50 +02:00
2021-10-10 12:35:11 +02:00
/// Project types recognised by watchexec.
///
/// There are two kinds of projects: VCS and software suite. The latter is more characterised by
/// what package manager or build system is in use. The enum is marked non-exhaustive as more types
/// can get added in the future.
///
/// Do not rely on the ordering or value (e.g. with transmute) of the variants.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum ProjectType {
2021-10-16 08:56:38 +02:00
/// VCS: [Bazaar](https://bazaar.canonical.com/).
///
/// Detects when a `.bzr` folder or a `.bzrignore` file is present. Bazaar does not support (at
/// writing, anyway) ignore files deeper than the repository origin, so this should not
/// false-positive.
2021-10-10 12:35:11 +02:00
Bazaar,
2021-10-16 08:56:38 +02:00
/// VCS: [Darcs](http://darcs.net/).
///
/// Detects when a `_darcs` folder is present.
2021-10-10 12:35:11 +02:00
Darcs,
2021-10-16 08:56:38 +02:00
/// VCS: [Fossil](https://www.fossil-scm.org/).
///
/// Detects when a `.fossil-settings` folder is present.
2021-10-10 12:35:11 +02:00
Fossil,
2021-10-16 08:56:38 +02:00
/// VCS: [Git](https://git-scm.com/).
///
/// Detects when a `.git` folder is present, or any of the files `.gitattributes` or
/// `.gitmodules`. Does _not_ check or return from the presence of `.gitignore` files, as Git
/// supports nested ignores, and that would result in false-positives.
2021-10-10 12:35:11 +02:00
Git,
2021-10-16 08:56:38 +02:00
/// VCS: [Mercurial](https://www.mercurial-scm.org/).
///
/// Detects when a `.hg` folder is present, or any of the files `.hgignore` or `.hgtags`.
/// Mercurial does not support (at writing, anyway) ignore files deeper than the repository
/// origin, so this should not false-positive.
2021-10-10 12:35:11 +02:00
Mercurial,
2021-10-16 08:56:38 +02:00
/// VCS: [Pijul](https://pijul.org/).
///
/// This is not detected at the moment.
2021-10-10 12:35:11 +02:00
Pijul,
2022-01-16 04:13:05 +01:00
/// VCS: [Subversion](https://subversion.apache.org) (aka SVN).
///
/// Detects when a `.svn` folder is present.
2022-01-16 04:13:05 +01:00
Subversion,
2021-10-16 08:56:38 +02:00
/// Soft: [Ruby](https://www.ruby-lang.org/)s [Bundler](https://bundler.io/).
///
/// Detects when a `Gemfile` file is present.
2021-10-10 12:35:11 +02:00
Bundler,
2021-10-16 08:56:38 +02:00
/// Soft: the [C programming language](https://en.wikipedia.org/wiki/C_(programming_language)).
///
/// Detects when a `.ctags` file is present.
2021-10-10 12:35:11 +02:00
C,
2021-10-16 08:56:38 +02:00
/// Soft: [Rust](https://www.rust-lang.org/)s [Cargo](https://doc.rust-lang.org/cargo/).
///
/// Detects Cargo workspaces and Cargo crates through the presence of a `Cargo.toml` file.
2021-10-10 12:35:11 +02:00
Cargo,
2021-10-16 08:56:38 +02:00
/// Soft: the [Docker](https://www.docker.com/) container runtime.
///
/// Detects when a `Dockerfile` file is present.
2021-10-10 12:35:11 +02:00
Docker,
2021-10-16 08:56:38 +02:00
/// Soft: the [Elixir](https://elixir-lang.org/) language.
///
/// Detects when a `mix.exs` file is present.
2021-10-10 12:35:11 +02:00
Elixir,
2021-10-16 08:56:38 +02:00
/// Soft: the [Go](https://golang.net) language.
///
/// Detects when a `go.mod` or `go.sum` file is present.
Go,
2021-10-16 08:56:38 +02:00
/// Soft: [Java](https://www.java.com/)s [Gradle](https://gradle.org/).
///
/// Detects when a `build.gradle` file is present.
2021-10-10 12:35:11 +02:00
Gradle,
2021-10-16 08:56:38 +02:00
/// Soft: [EcmaScript](https://www.ecmascript.org/) (aka JavaScript).
///
/// Detects when a `package.json` or `cgmanifest.json` file is present.
///
/// This is a catch-all for all `package.json`-based projects, and does not differentiate
/// between NPM, Yarn, PNPM, Node, browser, Deno, Bun, etc.
2021-10-10 12:35:11 +02:00
JavaScript,
2021-10-16 08:56:38 +02:00
/// Soft: [Clojure](https://clojure.org/)s [Leiningen](https://leiningen.org/).
///
/// Detects when a `project.clj` file is present.
2021-10-10 12:35:11 +02:00
Leiningen,
2021-10-16 08:56:38 +02:00
/// Soft: [Java](https://www.java.com/)s [Maven](https://maven.apache.org/).
///
/// Detects when a `pom.xml` file is present.
2021-10-10 12:35:11 +02:00
Maven,
2021-10-16 08:56:38 +02:00
/// Soft: the [Perl](https://www.perl.org/) language.
///
/// Detects when a `.perltidyrc` or `Makefile.PL` file is present.
2021-10-10 12:35:11 +02:00
Perl,
2021-10-16 08:56:38 +02:00
/// Soft: the [PHP](https://www.php.net/) language.
///
/// Detects when a `composer.json` file is present.
2021-10-10 12:35:11 +02:00
PHP,
2021-10-16 08:56:38 +02:00
/// Soft: [Python](https://www.python.org/)s [Pip](https://www.pip.org/).
///
/// Detects when a `requirements.txt` or `Pipfile` file is present.
2021-10-10 12:35:11 +02:00
Pip,
2021-10-16 08:56:38 +02:00
/// Soft: the [V](https://www.v-lang.org/) language.
///
/// Detects when a `v.mod` file is present.
2021-10-10 12:35:11 +02:00
V,
/// Soft: the [Zig](https://ziglang.org/) language.
///
/// Detects when a `build.zig` file is present.
Zig,
2021-10-10 12:35:11 +02:00
}
impl ProjectType {
/// Returns true if the project type is a VCS.
2023-01-06 14:53:49 +01:00
#[must_use]
pub const fn is_vcs(self) -> bool {
matches!(
self,
2022-01-16 04:13:05 +01:00
Self::Bazaar
| Self::Darcs | Self::Fossil
| Self::Git | Self::Mercurial
| Self::Pijul | Self::Subversion
)
}
/// Returns true if the project type is a software suite.
2023-01-06 14:53:49 +01:00
#[must_use]
pub const fn is_soft(self) -> bool {
matches!(
self,
Self::Bundler
| Self::C | Self::Cargo
| Self::Docker | Self::Elixir
| Self::Gradle | Self::JavaScript
| Self::Leiningen
| Self::Maven | Self::Perl
| Self::PHP | Self::Pip
2021-10-16 08:56:38 +02:00
| Self::V
)
}
}
2021-10-10 12:35:11 +02:00
/// Traverses the parents of the given path and returns _all_ that are project origins.
///
/// This checks for the presence of a wide range of files and directories that are likely to be
/// present and indicative of the root or origin path of a project. It's entirely possible to have
/// multiple such origins show up: for example, a member of a Cargo workspace will list both the
/// member project and the workspace root as origins.
///
/// This looks at a wider variety of files than the [`types`] function does: something can be
/// detected as an origin but not be able to match to any particular [`ProjectType`].
2023-01-06 14:53:49 +01:00
pub async fn origins(path: impl AsRef<Path> + Send) -> HashSet<PathBuf> {
fn check_list(list: &DirList) -> bool {
2021-10-10 12:35:11 +02:00
if list.is_empty() {
return false;
}
2021-10-19 13:51:33 +02:00
[
2021-10-10 12:35:11 +02:00
list.has_dir("_darcs"),
list.has_dir(".bzr"),
list.has_dir(".fossil-settings"),
list.has_dir(".git"),
list.has_dir(".github"),
list.has_dir(".hg"),
2022-01-16 04:13:05 +01:00
list.has_dir(".svn"),
2021-10-10 12:35:11 +02:00
list.has_file(".asf.yaml"),
list.has_file(".bzrignore"),
list.has_file(".codecov.yml"),
list.has_file(".ctags"),
list.has_file(".editorconfig"),
list.has_file(".gitattributes"),
list.has_file(".gitmodules"),
list.has_file(".hgignore"),
list.has_file(".hgtags"),
list.has_file(".perltidyrc"),
list.has_file(".travis.yml"),
list.has_file("appveyor.yml"),
list.has_file("build.gradle"),
list.has_file("build.properties"),
list.has_file("build.xml"),
list.has_file("Cargo.toml"),
list.has_file("Cargo.lock"),
list.has_file("cgmanifest.json"),
list.has_file("CMakeLists.txt"),
list.has_file("composer.json"),
list.has_file("COPYING"),
list.has_file("docker-compose.yml"),
list.has_file("Dockerfile"),
list.has_file("Gemfile"),
list.has_file("LICENSE.txt"),
list.has_file("LICENSE"),
list.has_file("Makefile.am"),
list.has_file("Makefile.pl"),
list.has_file("Makefile.PL"),
list.has_file("Makefile"),
list.has_file("mix.exs"),
list.has_file("moonshine-dependencies.xml"),
list.has_file("package.json"),
2024-01-01 04:41:14 +01:00
list.has_file("package-lock.json"),
list.has_file("pnpm-lock.yaml"),
list.has_file("yarn.lock"),
2021-10-10 12:35:11 +02:00
list.has_file("pom.xml"),
list.has_file("project.clj"),
list.has_file("requirements.txt"),
list.has_file("v.mod"),
list.has_file("CONTRIBUTING.md"),
list.has_file("go.mod"),
list.has_file("go.sum"),
list.has_file("Pipfile"),
list.has_file("build.zig"),
2021-10-19 13:51:33 +02:00
]
.into_iter()
2021-10-10 12:35:11 +02:00
.any(|f| f)
2021-10-10 06:55:50 +02:00
}
2023-01-06 14:53:49 +01:00
let mut origins = HashSet::new();
let path = path.as_ref();
let mut current = path;
if check_list(&DirList::obtain(current).await) {
origins.insert(current.to_owned());
2021-10-10 06:55:50 +02:00
}
while let Some(parent) = current.parent() {
current = parent;
2023-01-06 14:53:49 +01:00
if check_list(&DirList::obtain(current).await) {
origins.insert(current.to_owned());
2021-10-10 06:55:50 +02:00
continue;
}
}
origins
2021-10-09 07:45:32 +02:00
}
/// Returns all project types detected at this given origin.
///
/// This should be called with a result of [`origins()`], or a project origin if already known; it
2021-10-09 07:45:32 +02:00
/// will not find the origin itself.
2021-10-10 12:35:27 +02:00
///
/// The returned list may be empty.
///
/// Note that this only detects project types listed in the [`ProjectType`] enum, and may not detect
/// anything for some paths returned by [`origins()`].
2023-01-06 14:53:49 +01:00
pub async fn types(path: impl AsRef<Path> + Send) -> HashSet<ProjectType> {
let path = path.as_ref();
let list = DirList::obtain(path).await;
2021-10-19 13:51:33 +02:00
[
2021-10-10 12:35:27 +02:00
list.if_has_dir("_darcs", ProjectType::Darcs),
list.if_has_dir(".bzr", ProjectType::Bazaar),
list.if_has_dir(".fossil-settings", ProjectType::Fossil),
list.if_has_dir(".git", ProjectType::Git),
list.if_has_dir(".hg", ProjectType::Mercurial),
2022-01-16 04:13:05 +01:00
list.if_has_dir(".svn", ProjectType::Subversion),
2021-10-10 12:35:27 +02:00
list.if_has_file(".bzrignore", ProjectType::Bazaar),
list.if_has_file(".ctags", ProjectType::C),
list.if_has_file(".gitattributes", ProjectType::Git),
list.if_has_file(".gitmodules", ProjectType::Git),
list.if_has_file(".hgignore", ProjectType::Mercurial),
list.if_has_file(".hgtags", ProjectType::Mercurial),
list.if_has_file(".perltidyrc", ProjectType::Perl),
list.if_has_file("build.gradle", ProjectType::Gradle),
list.if_has_file("Cargo.toml", ProjectType::Cargo),
list.if_has_file("cgmanifest.json", ProjectType::JavaScript),
list.if_has_file("composer.json", ProjectType::PHP),
list.if_has_file("Dockerfile", ProjectType::Docker),
list.if_has_file("Gemfile", ProjectType::Bundler),
list.if_has_file("Makefile.PL", ProjectType::Perl),
list.if_has_file("mix.exs", ProjectType::Elixir),
list.if_has_file("package.json", ProjectType::JavaScript),
list.if_has_file("pom.xml", ProjectType::Maven),
list.if_has_file("project.clj", ProjectType::Leiningen),
list.if_has_file("requirements.txt", ProjectType::Pip),
list.if_has_file("v.mod", ProjectType::V),
list.if_has_file("go.mod", ProjectType::Go),
list.if_has_file("go.sum", ProjectType::Go),
list.if_has_file("Pipfile", ProjectType::Pip),
list.if_has_file("build.zig", ProjectType::Zig),
2021-10-19 13:51:33 +02:00
]
.into_iter()
2021-10-10 12:35:27 +02:00
.flatten()
.collect()
2021-10-09 07:45:32 +02:00
}
2021-10-10 12:35:11 +02:00
#[derive(Debug, Default)]
struct DirList(HashMap<PathBuf, FileType>);
impl DirList {
async fn obtain(path: &Path) -> Self {
if let Ok(s) = read_dir(path).await {
Self(
ReadDirStream::new(s)
.filter_map(|entry| async move {
match entry {
Err(_) => None,
Ok(entry) => {
if let (Ok(path), Ok(file_type)) =
(entry.path().strip_prefix(path), entry.file_type().await)
{
Some((path.to_owned(), file_type))
} else {
None
}
}
}
})
.collect::<HashMap<_, _>>()
.await,
)
} else {
Self::default()
}
}
2021-10-09 07:45:32 +02:00
2021-10-10 12:35:11 +02:00
#[inline]
fn is_empty(&self) -> bool {
self.0.is_empty()
2021-10-10 12:35:27 +02:00
}
2021-10-10 06:55:50 +02:00
2021-10-10 12:35:27 +02:00
#[inline]
2021-10-10 12:35:11 +02:00
fn has_file(&self, name: impl AsRef<Path>) -> bool {
let name = name.as_ref();
2023-01-06 14:53:49 +01:00
self.0.get(name).map_or(false, std::fs::FileType::is_file)
2021-10-10 12:35:27 +02:00
}
2021-10-10 06:55:50 +02:00
2021-10-10 12:35:27 +02:00
#[inline]
2021-10-10 12:35:11 +02:00
fn has_dir(&self, name: impl AsRef<Path>) -> bool {
let name = name.as_ref();
2023-01-06 14:53:49 +01:00
self.0.get(name).map_or(false, std::fs::FileType::is_dir)
2021-10-10 06:55:50 +02:00
}
2021-10-10 12:35:11 +02:00
#[inline]
fn if_has_file(&self, name: impl AsRef<Path>, project: ProjectType) -> Option<ProjectType> {
if self.has_file(name) {
Some(project)
} else {
None
2021-10-10 12:35:27 +02:00
}
2021-10-10 12:35:11 +02:00
}
2021-10-10 06:55:50 +02:00
2021-10-10 12:35:27 +02:00
#[inline]
2021-10-10 12:35:11 +02:00
fn if_has_dir(&self, name: impl AsRef<Path>, project: ProjectType) -> Option<ProjectType> {
if self.has_dir(name) {
Some(project)
} else {
None
2021-10-10 12:35:27 +02:00
}
2021-10-10 06:55:50 +02:00
}
}