Find project origins more efficiently

This commit is contained in:
Félix Saparelli 2021-10-10 23:35:11 +13:00
parent 59fe74656a
commit 1dff1f5644
No known key found for this signature in database
GPG Key ID: B948C4BAE44FC474
3 changed files with 160 additions and 104 deletions

1
Cargo.lock generated
View File

@ -2658,6 +2658,7 @@ dependencies = [
"regex",
"thiserror",
"tokio",
"tokio-stream",
"tracing",
"tracing-subscriber",
"unicase",

View File

@ -16,10 +16,12 @@ edition = "2018"
[dependencies]
async-recursion = "0.3.2"
async-stream = "0.3.2"
atomic-take = "1.0.0"
clearscreen = "1.0.6"
dunce = "1.0.2"
futures = "0.3.16"
git2 = "0.13.22"
globset = "0.4.8"
miette = "3.2.0"
nom = "7.0.0"
@ -29,8 +31,6 @@ regex = "1.5.4"
thiserror = "1.0.26"
tracing = "0.1.26"
unicase = "2.6.0"
async-stream = "0.3.2"
git2 = "0.13.22"
[dependencies.command-group]
version = "1.0.5"
@ -40,5 +40,9 @@ features = ["with-tokio"]
version = "1.10.0"
features = ["full"]
[dependencies.tokio-stream]
version = "0.1.7"
features = ["fs"]
[dev-dependencies]
tracing-subscriber = "0.2.19"

View File

@ -1,84 +1,120 @@
//! Detect project type and origin.
use std::{
fs::Metadata,
io::Error,
collections::{HashMap, HashSet},
fs::FileType,
path::{Path, PathBuf},
};
use futures::{future::ready as is_true, stream::FuturesUnordered, StreamExt};
use tokio::fs::metadata;
use tracing::trace;
use futures::StreamExt;
use tokio::fs::read_dir;
use tokio_stream::wrappers::ReadDirStream;
/// Project types recognised by watchexec.
///
/// There are two kinds of projects: VCS and software suite. The latter is more characterised by
/// what package manager or build system is in use. The enum is marked non-exhaustive as more types
/// can get added in the future.
///
/// Do not rely on the ordering or value (e.g. with transmute) of the variants.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum ProjectType {
Bazaar,
Darcs,
Fossil,
Git,
Mercurial,
Pijul,
Bundler,
C,
Cargo,
Docker,
Elixir,
Gradle,
JavaScript,
Leiningen,
Maven,
Perl,
PHP,
Pip,
RubyGem,
V,
}
/// Traverses the parents of the given path and returns _all_ that are project origins.
///
/// This checks for the presence of a wide range of files and directories that are likely to be
/// present and indicative of the root or origin path of a project. It's entirely possible to have
/// multiple such origins show up: for example, a member of a Cargo workspace will list both the
/// member project and the workspace root as origins.
pub async fn origins(path: impl AsRef<Path>) -> Vec<PathBuf> {
let mut origins = Vec::new();
async fn check_origin(path: &Path) -> bool {
let dirtests: FuturesUnordered<_> = vec![
dir_exists(path.join("_darcs")),
dir_exists(path.join(".bzr")),
dir_exists(path.join(".fossil-settings")),
dir_exists(path.join(".git")),
dir_exists(path.join(".github")),
dir_exists(path.join(".hg")),
]
.into_iter()
.collect();
fn check_list(list: DirList) -> bool {
if list.is_empty() {
return false;
}
let filetests: FuturesUnordered<_> = vec![
file_exists(path.join(".asf.yaml")),
file_exists(path.join(".bzrignore")),
file_exists(path.join(".codecov.yml")),
file_exists(path.join(".ctags")),
file_exists(path.join(".editorconfig")),
file_exists(path.join(".gitattributes")),
file_exists(path.join(".gitmodules")),
file_exists(path.join(".hgignore")),
file_exists(path.join(".hgtags")),
file_exists(path.join(".perltidyrc")),
file_exists(path.join(".travis.yml")),
file_exists(path.join("appveyor.yml")),
file_exists(path.join("build.gradle")),
file_exists(path.join("build.properties")),
file_exists(path.join("build.xml")),
file_exists(path.join("Cargo.toml")),
file_exists(path.join("cgmanifest.json")),
file_exists(path.join("CMakeLists.txt")),
file_exists(path.join("composer.json")),
file_exists(path.join("COPYING")),
file_exists(path.join("docker-compose.yml")),
file_exists(path.join("Dockerfile")),
file_exists(path.join("Gemfile")),
file_exists(path.join("LICENSE.txt")),
file_exists(path.join("LICENSE")),
file_exists(path.join("Makefile.am")),
file_exists(path.join("Makefile.pl")),
file_exists(path.join("Makefile.PL")),
file_exists(path.join("Makefile")),
file_exists(path.join("mix.exs")),
file_exists(path.join("moonshine-dependencies.xml")),
file_exists(path.join("package.json")),
file_exists(path.join("pom.xml")),
file_exists(path.join("project.clj")),
file_exists(path.join("README.md")),
file_exists(path.join("README")),
file_exists(path.join("requirements.txt")),
file_exists(path.join("v.mod")),
]
.into_iter()
.collect();
dirtests.any(is_true).await || filetests.any(is_true).await
IntoIterator::into_iter([
list.has_dir("_darcs"),
list.has_dir(".bzr"),
list.has_dir(".fossil-settings"),
list.has_dir(".git"),
list.has_dir(".github"),
list.has_dir(".hg"),
list.has_file(".asf.yaml"),
list.has_file(".bzrignore"),
list.has_file(".codecov.yml"),
list.has_file(".ctags"),
list.has_file(".editorconfig"),
list.has_file(".gitattributes"),
list.has_file(".gitmodules"),
list.has_file(".hgignore"),
list.has_file(".hgtags"),
list.has_file(".perltidyrc"),
list.has_file(".travis.yml"),
list.has_file("appveyor.yml"),
list.has_file("build.gradle"),
list.has_file("build.properties"),
list.has_file("build.xml"),
list.has_file("Cargo.toml"),
list.has_file("Cargo.lock"),
list.has_file("cgmanifest.json"),
list.has_file("CMakeLists.txt"),
list.has_file("composer.json"),
list.has_file("COPYING"),
list.has_file("docker-compose.yml"),
list.has_file("Dockerfile"),
list.has_file("Gemfile"),
list.has_file("LICENSE.txt"),
list.has_file("LICENSE"),
list.has_file("Makefile.am"),
list.has_file("Makefile.pl"),
list.has_file("Makefile.PL"),
list.has_file("Makefile"),
list.has_file("mix.exs"),
list.has_file("moonshine-dependencies.xml"),
list.has_file("package.json"),
list.has_file("pom.xml"),
list.has_file("project.clj"),
list.has_file("README.md"),
list.has_file("README"),
list.has_file("requirements.txt"),
list.has_file("v.mod"),
])
.any(|f| f)
}
let mut current = path.as_ref();
if check_origin(path.as_ref()).await {
if check_list(DirList::obtain(current).await) {
origins.push(current.to_owned());
}
while let Some(parent) = current.parent() {
current = parent;
if check_origin(current).await {
if check_list(DirList::obtain(current).await) {
origins.push(current.to_owned());
continue;
}
@ -95,52 +131,67 @@ pub async fn types(path: impl AsRef<Path>) -> Result<Vec<ProjectType>, Error> {
todo!()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum ProjectType {
Bazaar,
Darcs,
Fossil,
Git,
Mercurial,
Pijul,
Bundler,
Cargo,
JavaScript,
Pip,
RubyGem,
}
#[inline]
async fn exists(path: &Path) -> Option<Metadata> {
metadata(path).await.ok()
}
#[inline]
async fn file_exists(path: PathBuf) -> bool {
let res = exists(&path)
.await
.map(|meta| meta.is_file())
.unwrap_or(false);
if res {
trace!(?path, "file exists");
#[derive(Debug, Default)]
struct DirList(HashMap<PathBuf, FileType>);
impl DirList {
async fn obtain(path: &Path) -> Self {
if let Ok(s) = read_dir(path).await {
Self(
ReadDirStream::new(s)
.filter_map(|entry| async move {
match entry {
Err(_) => None,
Ok(entry) => {
if let (Ok(path), Ok(file_type)) =
(entry.path().strip_prefix(path), entry.file_type().await)
{
Some((path.to_owned(), file_type))
} else {
None
}
}
}
})
.collect::<HashMap<_, _>>()
.await,
)
} else {
Self::default()
}
}
res
#[inline]
fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
async fn dir_exists(path: PathBuf) -> bool {
let res = exists(&path)
.await
.map(|meta| meta.is_dir())
.unwrap_or(false);
fn has_file(&self, name: impl AsRef<Path>) -> bool {
let name = name.as_ref();
self.0.get(name).map(|x| x.is_file()).unwrap_or(false)
}
if res {
trace!(?path, "dir exists");
#[inline]
fn has_dir(&self, name: impl AsRef<Path>) -> bool {
let name = name.as_ref();
self.0.get(name).map(|x| x.is_dir()).unwrap_or(false)
}
res
#[inline]
fn if_has_file(&self, name: impl AsRef<Path>, project: ProjectType) -> Option<ProjectType> {
if self.has_file(name) {
Some(project)
} else {
None
}
}
#[inline]
fn if_has_dir(&self, name: impl AsRef<Path>, project: ProjectType) -> Option<ProjectType> {
if self.has_dir(name) {
Some(project)
} else {
None
}
}
}