Projects STRLCPY deduplicator Commits ab44d1ed
🤬
  • ■ ■ ■ ■
    Cargo.lock
    skipped 323 lines
    324 324   
    325 325  [[package]]
    326 326  name = "deduplicator"
    327  -version = "0.1.4"
     327 +version = "0.1.5"
    328 328  dependencies = [
    329 329   "anyhow",
    330 330   "bytesize",
    skipped 917 lines
  • ■ ■ ■ ■
    Cargo.toml
    1 1  [package]
    2 2  name = "deduplicator"
    3  -version = "0.1.4"
     3 +version = "0.1.5"
    4 4  edition = "2021"
    5 5  description = "find,filter,delete Duplicates"
    6 6  license = "MIT"
    skipped 26 lines
  • ■ ■ ■ ■ ■
    README.md
    skipped 6 lines
    7 7  ## Usage
    8 8   
    9 9  ```bash
    10  -Usage: deduplicator [OPTIONS]
     10 +Usage: deduplicator [OPTIONS] [scan_dir_path]
     11 + 
     12 +Arguments:
     13 + [scan_dir_path] Run Deduplicator on dir different from pwd (e.g., ~/Pictures )
    11 14   
    12 15  Options:
    13  - -t, --types <TYPES> Filetypes to deduplicate (default = all)
    14  - --dir <DIR> Run Deduplicator on dir different from pwd
    15  - -i, --interactive Delete files interactively
    16  - -m, --minsize <MINSIZE> Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
    17  - -h, --help Print help information
    18  - -V, --version Print version information
     16 + -t, --types <TYPES> Filetypes to deduplicate [default = all]
     17 + -i, --interactive Delete files interactively
     18 + -s, --min-size <MIN_SIZE> Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T) [default: 1b]
     19 + -d, --max-depth <MAX_DEPTH> Max Depth to scan while looking for duplicates
     20 + --min-depth <MIN_DEPTH> Min Depth to scan while looking for duplicates
     21 + -f, --follow-links Follow links while scanning directories
     22 + -h, --help Print help information
     23 + -V, --version Print version information
     24 +```
     25 +### Examples
     26 + 
     27 +```bash
     28 +# Scan for duplicates recursively from the current dir, only look for png, jpg & pdf file types & interactively delete files
     29 +deduplicator -t pdf,jpg,png -i
     30 + 
     31 +# Scan for duplicates recursively from the ~/Pictures dir, only look for png, jpeg, jpg & pdf file types & interactively delete files
     32 +deduplicator ~/Pictures/ -t png,jpeg,jpg,pdf -i
     33 + 
     34 +# Scan for duplicates in the ~/Pictures without recursing into subdirectories
     35 +deduplicator ~/Pictures --max-depth 0
     36 + 
     37 +# look for duplicates in the ~/.config directory while also recursing into symbolic link paths
     38 +deduplicator ~/.config --follow-links
     39 + 
     40 +# scan for duplicates that are greater than 100mb in the ~/Media directory
     41 +deduplicator ~/Media --min-size 100mb
    19 42  ```
    20 43   
    21 44  ## Installation
    skipped 53 lines
    75 98  |:---|:---|---:|---:|---:|---:|
    76 99  | `deduplicator --dir ~/Data/tmp` | (~120G) | 27.5 ± 1.0 | 26.0 | 32.1 | 1.70 ± 0.09 |
    77 100  | `deduplicator --dir ~/Data/books` | (~8.6G) | 21.8 ± 0.7 | 20.5 | 24.4 | 1.35 ± 0.07 |
    78  -| `deduplicator --dir ~/Data/books --minsize 10M` | (~8.6G) | 16.1 ± 0.6 | 14.9 | 18.8 | 1.00 |
     101 +| `deduplicator --dir ~/Data/books --min-size 10M` | (~8.6G) | 16.1 ± 0.6 | 14.9 | 18.8 | 1.00 |
    79 102  | `deduplicator --dir ~/Data/ --types pdf,jpg,png,jpeg` | (~290G) | 1857.4 ± 24.5 | 1817.0 | 1895.5 | 115.07 ± 4.64 |
    80 103   
    81 104  * The last entry is lower because of the number of files deduplicator had to go through (~660895 Files). The average size of the files rarely affect the performance of deduplicator.
    skipped 20 lines
  • ■ ■ ■ ■ ■ ■
    src/filters.rs
    1 1  use crate::file_manager::File;
    2 2  use crate::params::Params;
    3 3   
    4  -pub fn is_file_gt_minsize(app_opts: &Params, file: &File) -> bool {
    5  - match app_opts.get_minsize() {
     4 +pub fn is_file_gt_min_size(app_opts: &Params, file: &File) -> bool {
     5 + match app_opts.get_min_size() {
    6 6   Some(msize) => match file.size {
    7 7   Some(fsize) => fsize >= msize,
    8 8   None => true,
    skipped 5 lines
  • ■ ■ ■ ■ ■
    src/params.rs
    skipped 6 lines
    7 7  #[derive(Parser, Debug)]
    8 8  #[command(author, version, about, long_about = None)]
    9 9  pub struct Params {
    10  - /// Filetypes to deduplicate (default = all)
     10 + /// Filetypes to deduplicate [default = all]
    11 11   #[arg(short, long)]
    12 12   pub types: Option<String>,
    13  - /// Run Deduplicator on dir different from pwd
    14  - #[arg(long, value_hint = ValueHint::DirPath)]
     13 + /// Run Deduplicator on dir different from pwd (e.g., ~/Pictures )
     14 + #[arg(value_hint = ValueHint::DirPath, value_name = "scan_dir_path")]
    15 15   pub dir: Option<PathBuf>,
    16 16   /// Delete files interactively
    17 17   #[arg(long, short)]
    18 18   pub interactive: bool,
    19  - /// Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
     19 + /// Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T).
     20 + #[arg(long, short = 's', default_value = "1b")]
     21 + pub min_size: Option<String>,
     22 + /// Max Depth to scan while looking for duplicates
     23 + #[arg(long, short = 'd')]
     24 + pub max_depth: Option<usize>,
     25 + /// Min Depth to scan while looking for duplicates
     26 + #[arg(long)]
     27 + pub min_depth: Option<usize>,
     28 + /// Follow links while scanning directories
    20 29   #[arg(long, short)]
    21  - pub minsize: Option<String>,
     30 + pub follow_links: bool,
    22 31  }
    23 32   
    24 33  impl Params {
    25  - pub fn get_minsize(&self) -> Option<u64> {
    26  - match &self.minsize {
     34 + pub fn get_min_size(&self) -> Option<u64> {
     35 + match &self.min_size {
    27 36   Some(msize) => match msize.parse::<bytesize::ByteSize>() {
    28 37   Ok(units) => Some(units.0),
    29 38   Err(_) => None,
    skipped 9 lines
    39 48   Ok(dir)
    40 49   }
    41 50   
     51 + fn add_glob_min_depth(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
     52 + match self.min_depth {
     53 + Some(mindepth) => Ok(builder.min_depth(mindepth)),
     54 + None => Ok(builder),
     55 + }
     56 + }
     57 + 
     58 + fn add_glob_max_depth(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
     59 + match self.max_depth {
     60 + Some(maxdepth) => Ok(builder.max_depth(maxdepth)),
     61 + None => Ok(builder),
     62 + }
     63 + }
     64 + 
     65 + fn add_glob_follow_links(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
     66 + match self.follow_links {
     67 + true => Ok(builder.follow_links(true)),
     68 + false => Ok(builder.follow_links(false)),
     69 + }
     70 + }
     71 + 
    42 72   pub fn get_glob_walker(&self) -> Result<GlobWalker> {
    43 73   let pattern: String = match self.types.as_ref() {
    44 74   Some(filetypes) => format!("**/*{{{filetypes}}}"),
    45 75   None => "**/*".to_string(),
    46 76   };
    47  - // TODO: add params for maximum depth and following symlinks, then pass them to this builder
    48  - GlobWalkerBuilder::from_patterns(self.get_directory()?, &[pattern])
    49  - .build()
    50  - .map_err(|e| anyhow!(e))
     77 + 
     78 + let glob_walker_builder = self
     79 + .add_glob_min_depth(GlobWalkerBuilder::from_patterns(
     80 + self.get_directory()?,
     81 + &[pattern],
     82 + ))
     83 + .and_then(|builder| self.add_glob_max_depth(builder))
     84 + .and_then(|builder| self.add_glob_follow_links(builder))?;
     85 + 
     86 + glob_walker_builder.build().map_err(|e| anyhow!(e))
    51 87   }
    52 88  }
    53 89   
  • ■ ■ ■ ■
    src/scanner.rs
    skipped 62 lines
    63 63   hash: None,
    64 64   size: Some(fs::metadata(fpath).unwrap().len()),
    65 65   })
    66  - .filter(|file| filters::is_file_gt_minsize(app_opts, file))
     66 + .filter(|file| filters::is_file_gt_min_size(app_opts, file))
    67 67   .collect();
    68 68   
    69 69   Ok(files)
    skipped 65 lines
Please wait...
Page is in error, reload to recover