Implement option for printing custom formats

This commit is contained in:
Thayne McCombs 2022-06-16 01:22:43 -06:00
parent ef3194a510
commit 985f2b1374
8 changed files with 277 additions and 233 deletions

View File

@ -453,6 +453,20 @@ pub struct Opts {
)]
pub owner: Option<OwnerFilter>,
/// Instead of printing the file normally, print the format string with the following placeholders replaced:
/// '{}': path (of the current search result)
/// '{/}': basename
/// '{//}': parent directory
/// '{.}': path without file extension
/// '{/.}': basename without file extension
#[arg(
long,
value_name = "fmt",
help = "Print results according to template",
conflicts_with = "list_details"
)]
pub format: Option<String>,
#[command(flatten)]
pub exec: Exec,

View File

@ -8,6 +8,7 @@ use crate::filetypes::FileTypes;
#[cfg(unix)]
use crate::filter::OwnerFilter;
use crate::filter::{SizeFilter, TimeFilter};
use crate::fmt::FormatTemplate;
/// Configuration options for *fd*.
pub struct Config {
@ -85,6 +86,9 @@ pub struct Config {
/// The value (if present) will be a lowercase string without leading dots.
pub extensions: Option<RegexSet>,
/// A format string to use to format results, similarly to exec
pub format: Option<FormatTemplate>,
/// If a value is supplied, each item found will be used to generate and execute commands.
pub command: Option<Arc<CommandSet>>,

View File

@ -1,13 +1,10 @@
mod command;
mod input;
mod job;
mod token;
use std::borrow::Cow;
use std::ffi::{OsStr, OsString};
use std::ffi::OsString;
use std::io;
use std::iter;
use std::path::{Component, Path, PathBuf, Prefix};
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::sync::Mutex;
@ -15,11 +12,10 @@ use anyhow::{bail, Result};
use argmax::Command;
use crate::exit_codes::{merge_exitcodes, ExitCode};
use crate::fmt::{FormatTemplate, Token};
use self::command::{execute_commands, handle_cmd_error};
use self::input::{basename, dirname, remove_extension};
pub use self::job::{batch, job};
use self::token::{tokenize, Token};
/// Execution mode of the command
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -131,7 +127,7 @@ impl CommandSet {
#[derive(Debug)]
struct CommandBuilder {
pre_args: Vec<OsString>,
path_arg: ArgumentTemplate,
path_arg: FormatTemplate,
post_args: Vec<OsString>,
cmd: Command,
count: usize,
@ -220,7 +216,7 @@ impl CommandBuilder {
/// `generate_and_execute()` method will be used to generate a command and execute it.
#[derive(Debug, Clone, PartialEq)]
struct CommandTemplate {
args: Vec<ArgumentTemplate>,
args: Vec<FormatTemplate>,
}
impl CommandTemplate {
@ -235,7 +231,7 @@ impl CommandTemplate {
for arg in input {
let arg = arg.as_ref();
let tmpl = tokenize(arg);
let tmpl = FormatTemplate::parse(arg);
has_placeholder |= tmpl.has_tokens();
args.push(tmpl);
}
@ -251,7 +247,7 @@ impl CommandTemplate {
// If a placeholder token was not supplied, append one at the end of the command.
if !has_placeholder {
args.push(ArgumentTemplate::Tokens(vec![Token::Placeholder]));
args.push(FormatTemplate::Tokens(vec![Token::Placeholder]));
}
Ok(CommandTemplate { args })
@ -274,111 +270,6 @@ impl CommandTemplate {
}
}
/// Represents a template for a single command argument.
///
/// The argument is either a collection of `Token`s including at least one placeholder variant, or
/// a fixed text.
#[derive(Clone, Debug, PartialEq)]
enum ArgumentTemplate {
Tokens(Vec<Token>),
Text(String),
}
impl ArgumentTemplate {
pub fn has_tokens(&self) -> bool {
matches!(self, ArgumentTemplate::Tokens(_))
}
/// Generate an argument from this template. If path_separator is Some, then it will replace
/// the path separator in all placeholder tokens. Text arguments and tokens are not affected by
/// path separator substitution.
pub fn generate(&self, path: impl AsRef<Path>, path_separator: Option<&str>) -> OsString {
use self::Token::*;
let path = path.as_ref();
match *self {
ArgumentTemplate::Tokens(ref tokens) => {
let mut s = OsString::new();
for token in tokens {
match *token {
Basename => s.push(Self::replace_separator(basename(path), path_separator)),
BasenameNoExt => s.push(Self::replace_separator(
&remove_extension(basename(path).as_ref()),
path_separator,
)),
NoExt => s.push(Self::replace_separator(
&remove_extension(path),
path_separator,
)),
Parent => s.push(Self::replace_separator(&dirname(path), path_separator)),
Placeholder => {
s.push(Self::replace_separator(path.as_ref(), path_separator))
}
Text(ref string) => s.push(string),
}
}
s
}
ArgumentTemplate::Text(ref text) => OsString::from(text),
}
}
/// Replace the path separator in the input with the custom separator string. If path_separator
/// is None, simply return a borrowed Cow<OsStr> of the input. Otherwise, the input is
/// interpreted as a Path and its components are iterated through and re-joined into a new
/// OsString.
fn replace_separator<'a>(path: &'a OsStr, path_separator: Option<&str>) -> Cow<'a, OsStr> {
// fast-path - no replacement necessary
if path_separator.is_none() {
return Cow::Borrowed(path);
}
let path_separator = path_separator.unwrap();
let mut out = OsString::with_capacity(path.len());
let mut components = Path::new(path).components().peekable();
while let Some(comp) = components.next() {
match comp {
// Absolute paths on Windows are tricky. A Prefix component is usually a drive
// letter or UNC path, and is usually followed by RootDir. There are also
// "verbatim" prefixes beginning with "\\?\" that skip normalization. We choose to
// ignore verbatim path prefixes here because they're very rare, might be
// impossible to reach here, and there's no good way to deal with them. If users
// are doing something advanced involving verbatim windows paths, they can do their
// own output filtering with a tool like sed.
Component::Prefix(prefix) => {
if let Prefix::UNC(server, share) = prefix.kind() {
// Prefix::UNC is a parsed version of '\\server\share'
out.push(path_separator);
out.push(path_separator);
out.push(server);
out.push(path_separator);
out.push(share);
} else {
// All other Windows prefix types are rendered as-is. This results in e.g. "C:" for
// drive letters. DeviceNS and Verbatim* prefixes won't have backslashes converted,
// but they're not returned by directories fd can search anyway so we don't worry
// about them.
out.push(comp.as_os_str());
}
}
// Root directory is always replaced with the custom separator.
Component::RootDir => out.push(path_separator),
// Everything else is joined normally, with a trailing separator if we're not last
_ => {
out.push(comp.as_os_str());
if components.peek().is_some() {
out.push(path_separator);
}
}
}
}
Cow::Owned(out)
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -398,9 +289,9 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
ArgumentTemplate::Text("echo".into()),
ArgumentTemplate::Text("${SHELL}:".into()),
ArgumentTemplate::Tokens(vec![Token::Placeholder]),
FormatTemplate::Text("echo".into()),
FormatTemplate::Text("${SHELL}:".into()),
FormatTemplate::Tokens(vec![Token::Placeholder]),
]
}],
mode: ExecutionMode::OneByOne,
@ -415,8 +306,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
ArgumentTemplate::Text("echo".into()),
ArgumentTemplate::Tokens(vec![Token::NoExt]),
FormatTemplate::Text("echo".into()),
FormatTemplate::Tokens(vec![Token::NoExt]),
],
}],
mode: ExecutionMode::OneByOne,
@ -431,8 +322,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
ArgumentTemplate::Text("echo".into()),
ArgumentTemplate::Tokens(vec![Token::Basename]),
FormatTemplate::Text("echo".into()),
FormatTemplate::Tokens(vec![Token::Basename]),
],
}],
mode: ExecutionMode::OneByOne,
@ -447,8 +338,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
ArgumentTemplate::Text("echo".into()),
ArgumentTemplate::Tokens(vec![Token::Parent]),
FormatTemplate::Text("echo".into()),
FormatTemplate::Tokens(vec![Token::Parent]),
],
}],
mode: ExecutionMode::OneByOne,
@ -463,8 +354,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
ArgumentTemplate::Text("echo".into()),
ArgumentTemplate::Tokens(vec![Token::BasenameNoExt]),
FormatTemplate::Text("echo".into()),
FormatTemplate::Tokens(vec![Token::BasenameNoExt]),
],
}],
mode: ExecutionMode::OneByOne,
@ -494,9 +385,9 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
ArgumentTemplate::Text("cp".into()),
ArgumentTemplate::Tokens(vec![Token::Placeholder]),
ArgumentTemplate::Tokens(vec![
FormatTemplate::Text("cp".into()),
FormatTemplate::Tokens(vec![Token::Placeholder]),
FormatTemplate::Tokens(vec![
Token::BasenameNoExt,
Token::Text(".ext".into())
]),
@ -514,8 +405,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
ArgumentTemplate::Text("echo".into()),
ArgumentTemplate::Tokens(vec![Token::NoExt]),
FormatTemplate::Text("echo".into()),
FormatTemplate::Tokens(vec![Token::NoExt]),
],
}],
mode: ExecutionMode::Batch,
@ -540,7 +431,7 @@ mod tests {
#[test]
fn generate_custom_path_separator() {
let arg = ArgumentTemplate::Tokens(vec![Token::Placeholder]);
let arg = FormatTemplate::Tokens(vec![Token::Placeholder]);
macro_rules! check {
($input:expr, $expected:expr) => {
assert_eq!(arg.generate($input, Some("#")), OsString::from($expected));
@ -555,7 +446,7 @@ mod tests {
#[cfg(windows)]
#[test]
fn generate_custom_path_separator_windows() {
let arg = ArgumentTemplate::Tokens(vec![Token::Placeholder]);
let arg = FormatTemplate::Tokens(vec![Token::Placeholder]);
macro_rules! check {
($input:expr, $expected:expr) => {
assert_eq!(arg.generate($input, Some("#")), OsString::from($expected));

View File

@ -1,98 +0,0 @@
use aho_corasick::AhoCorasick;
use std::fmt::{self, Display, Formatter};
use std::sync::OnceLock;
use super::ArgumentTemplate;
/// Designates what should be written to a buffer
///
/// Each `Token` contains either text, or a placeholder variant, which will be used to generate
/// commands after all tokens for a given command template have been collected.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Token {
Placeholder,
Basename,
Parent,
NoExt,
BasenameNoExt,
Text(String),
}
impl Display for Token {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match *self {
Token::Placeholder => f.write_str("{}")?,
Token::Basename => f.write_str("{/}")?,
Token::Parent => f.write_str("{//}")?,
Token::NoExt => f.write_str("{.}")?,
Token::BasenameNoExt => f.write_str("{/.}")?,
Token::Text(ref string) => f.write_str(string)?,
}
Ok(())
}
}
static PLACEHOLDERS: OnceLock<AhoCorasick> = OnceLock::new();
pub(super) fn tokenize(input: &str) -> ArgumentTemplate {
// NOTE: we assume that { and } have the same length
const BRACE_LEN: usize = '{'.len_utf8();
let mut tokens = Vec::new();
let mut remaining = input;
let mut buf = String::new();
let placeholders = PLACEHOLDERS.get_or_init(|| {
AhoCorasick::new(&["{{", "}}", "{}", "{/}", "{//}", "{.}", "{/.}"]).unwrap()
});
while let Some(m) = placeholders.find(remaining) {
match m.pattern().as_u32() {
0 | 1 => {
// we found an escaped {{ or }}, so add
// everything up to the first char to the buffer
// then skip the second one.
buf += &remaining[..m.start() + BRACE_LEN];
remaining = &remaining[m.end()..];
}
id if !remaining[m.end()..].starts_with('}') => {
buf += &remaining[..m.start()];
if !buf.is_empty() {
tokens.push(Token::Text(std::mem::take(&mut buf)));
}
tokens.push(token_from_pattern_id(id));
remaining = &remaining[m.end()..];
}
_ => {
// We got a normal pattern, but the final "}"
// is escaped, so add up to that to the buffer, then
// skip the final }
buf += &remaining[..m.end()];
remaining = &remaining[m.end() + BRACE_LEN..];
}
}
}
// Add the rest of the string to the buffer, and add the final buffer to the tokens
if !remaining.is_empty() {
buf += remaining;
}
if tokens.is_empty() {
// No placeholders were found, so just return the text
return ArgumentTemplate::Text(buf);
}
// Add final text segment
if !buf.is_empty() {
tokens.push(Token::Text(buf));
}
debug_assert!(!tokens.is_empty());
ArgumentTemplate::Tokens(tokens)
}
fn token_from_pattern_id(id: u32) -> Token {
use Token::*;
match id {
2 => Placeholder,
3 => Basename,
4 => Parent,
5 => NoExt,
6 => BasenameNoExt,
_ => unreachable!(),
}
}

211
src/fmt/mod.rs Normal file
View File

@ -0,0 +1,211 @@
mod input;
use std::borrow::Cow;
use std::ffi::{OsStr, OsString};
use std::fmt::{self, Display, Formatter};
use std::path::{Component, Path, Prefix};
use std::sync::OnceLock;
use aho_corasick::AhoCorasick;
use self::input::{basename, dirname, remove_extension};
/// Designates what should be written to a buffer
///
/// Each `Token` contains either text, or a placeholder variant, which will be used to generate
/// commands after all tokens for a given command template have been collected.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Token {
Placeholder,
Basename,
Parent,
NoExt,
BasenameNoExt,
Text(String),
}
impl Display for Token {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match *self {
Token::Placeholder => f.write_str("{}")?,
Token::Basename => f.write_str("{/}")?,
Token::Parent => f.write_str("{//}")?,
Token::NoExt => f.write_str("{.}")?,
Token::BasenameNoExt => f.write_str("{/.}")?,
Token::Text(ref string) => f.write_str(string)?,
}
Ok(())
}
}
/// A parsed format string
///
/// This is either a collection of `Token`s including at least one placeholder variant,
/// or a fixed text.
#[derive(Clone, Debug, PartialEq)]
pub enum FormatTemplate {
Tokens(Vec<Token>),
Text(String),
}
static PLACEHOLDERS: OnceLock<AhoCorasick> = OnceLock::new();
impl FormatTemplate {
pub fn has_tokens(&self) -> bool {
matches!(self, FormatTemplate::Tokens(_))
}
pub fn parse(fmt: &str) -> Self {
// NOTE: we assume that { and } have the same length
const BRACE_LEN: usize = '{'.len_utf8();
let mut tokens = Vec::new();
let mut remaining = fmt;
let mut buf = String::new();
let placeholders = PLACEHOLDERS.get_or_init(|| {
AhoCorasick::new(&["{{", "}}", "{}", "{/}", "{//}", "{.}", "{/.}"]).unwrap()
});
while let Some(m) = placeholders.find(remaining) {
match m.pattern().as_u32() {
0 | 1 => {
// we found an escaped {{ or }}, so add
// everything up to the first char to the buffer
// then skip the second one.
buf += &remaining[..m.start() + BRACE_LEN];
remaining = &remaining[m.end()..];
}
id if !remaining[m.end()..].starts_with('}') => {
buf += &remaining[..m.start()];
if !buf.is_empty() {
tokens.push(Token::Text(std::mem::take(&mut buf)));
}
tokens.push(token_from_pattern_id(id));
remaining = &remaining[m.end()..];
}
_ => {
// We got a normal pattern, but the final "}"
// is escaped, so add up to that to the buffer, then
// skip the final }
buf += &remaining[..m.end()];
remaining = &remaining[m.end() + BRACE_LEN..];
}
}
}
// Add the rest of the string to the buffer, and add the final buffer to the tokens
if !remaining.is_empty() {
buf += remaining;
}
if tokens.is_empty() {
// No placeholders were found, so just return the text
return FormatTemplate::Text(buf);
}
// Add final text segment
if !buf.is_empty() {
tokens.push(Token::Text(buf));
}
debug_assert!(!tokens.is_empty());
FormatTemplate::Tokens(tokens)
}
/// Generate a result string from this template. If path_separator is Some, then it will replace
/// the path separator in all placeholder tokens. Fixed text and tokens are not affected by
/// path separator substitution.
pub fn generate(&self, path: impl AsRef<Path>, path_separator: Option<&str>) -> OsString {
use Token::*;
let path = path.as_ref();
match *self {
Self::Tokens(ref tokens) => {
let mut s = OsString::new();
for token in tokens {
match token {
Basename => s.push(Self::replace_separator(basename(path), path_separator)),
BasenameNoExt => s.push(Self::replace_separator(
&remove_extension(basename(path).as_ref()),
path_separator,
)),
NoExt => s.push(Self::replace_separator(
&remove_extension(path),
path_separator,
)),
Parent => s.push(Self::replace_separator(&dirname(path), path_separator)),
Placeholder => {
s.push(Self::replace_separator(path.as_ref(), path_separator))
}
Text(ref string) => s.push(string),
}
}
s
}
Self::Text(ref text) => OsString::from(text),
}
}
/// Replace the path separator in the input with the custom separator string. If path_separator
/// is None, simply return a borrowed Cow<OsStr> of the input. Otherwise, the input is
/// interpreted as a Path and its components are iterated through and re-joined into a new
/// OsString.
fn replace_separator<'a>(path: &'a OsStr, path_separator: Option<&str>) -> Cow<'a, OsStr> {
// fast-path - no replacement necessary
if path_separator.is_none() {
return Cow::Borrowed(path);
}
let path_separator = path_separator.unwrap();
let mut out = OsString::with_capacity(path.len());
let mut components = Path::new(path).components().peekable();
while let Some(comp) = components.next() {
match comp {
// Absolute paths on Windows are tricky. A Prefix component is usually a drive
// letter or UNC path, and is usually followed by RootDir. There are also
// "verbatim" prefixes beginning with "\\?\" that skip normalization. We choose to
// ignore verbatim path prefixes here because they're very rare, might be
// impossible to reach here, and there's no good way to deal with them. If users
// are doing something advanced involving verbatim windows paths, they can do their
// own output filtering with a tool like sed.
Component::Prefix(prefix) => {
if let Prefix::UNC(server, share) = prefix.kind() {
// Prefix::UNC is a parsed version of '\\server\share'
out.push(path_separator);
out.push(path_separator);
out.push(server);
out.push(path_separator);
out.push(share);
} else {
// All other Windows prefix types are rendered as-is. This results in e.g. "C:" for
// drive letters. DeviceNS and Verbatim* prefixes won't have backslashes converted,
// but they're not returned by directories fd can search anyway so we don't worry
// about them.
out.push(comp.as_os_str());
}
}
// Root directory is always replaced with the custom separator.
Component::RootDir => out.push(path_separator),
// Everything else is joined normally, with a trailing separator if we're not last
_ => {
out.push(comp.as_os_str());
if components.peek().is_some() {
out.push(path_separator);
}
}
}
}
Cow::Owned(out)
}
}
// Convert the id from an aho-corasick match to the
// appropriate token
fn token_from_pattern_id(id: u32) -> Token {
use Token::*;
match id {
2 => Placeholder,
3 => Basename,
4 => Parent,
5 => NoExt,
6 => BasenameNoExt,
_ => unreachable!(),
}
}

View File

@ -7,6 +7,7 @@ mod exit_codes;
mod filesystem;
mod filetypes;
mod filter;
mod fmt;
mod output;
mod regex_helper;
mod walk;
@ -299,6 +300,10 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result<Config
.build()
})
.transpose()?,
format: opts
.format
.as_deref()
.map(crate::fmt::FormatTemplate::parse),
command: command.map(Arc::new),
batch_size: opts.batch_size,
exclude_patterns: opts.exclude.iter().map(|p| String::from("!") + p).collect(),

View File

@ -7,6 +7,7 @@ use crate::config::Config;
use crate::dir_entry::DirEntry;
use crate::error::print_error;
use crate::exit_codes::ExitCode;
use crate::fmt::FormatTemplate;
fn replace_path_separator(path: &str, new_path_separator: &str) -> String {
path.replace(std::path::MAIN_SEPARATOR, new_path_separator)
@ -14,7 +15,10 @@ fn replace_path_separator(path: &str, new_path_separator: &str) -> String {
// TODO: this function is performance critical and can probably be optimized
pub fn print_entry<W: Write>(stdout: &mut W, entry: &DirEntry, config: &Config) {
let r = if let Some(ref ls_colors) = config.ls_colors {
// TODO: use format if supplied
let r = if let Some(ref format) = config.format {
print_entry_format(stdout, entry, config, format)
} else if let Some(ref ls_colors) = config.ls_colors {
print_entry_colorized(stdout, entry, config, ls_colors)
} else {
print_entry_uncolorized(stdout, entry, config)
@ -54,6 +58,19 @@ fn print_trailing_slash<W: Write>(
Ok(())
}
// TODO: this function is performance critical and can probably be optimized
fn print_entry_format<W: Write>(
stdout: &mut W,
entry: &DirEntry,
config: &Config,
format: &FormatTemplate,
) -> io::Result<()> {
let separator = if config.null_separator { "\0" } else { "\n" };
let output = format.generate(entry.path(), config.path_separator.as_deref());
// TODO: support writing raw bytes on unix?
write!(stdout, "{}{}", output.to_string_lossy(), separator)
}
// TODO: this function is performance critical and can probably be optimized
fn print_entry_colorized<W: Write>(
stdout: &mut W,