Projects STRLCPY deduplicator Commits 9f4d9139
🤬
  • added --min-depth --max-depth --follow-links and renamed --minsize to --min-size

  • Loading...
  • sreedev committed 1 year ago
    9f4d9139
    1 parent 7d66aeef
  • ■ ■ ■ ■ ■ ■
    README.md
    skipped 9 lines
    10 10  Usage: deduplicator [OPTIONS]
    11 11   
    12 12  Options:
    13  - -t, --types <TYPES> Filetypes to deduplicate (default = all)
    14  - --dir <DIR> Run Deduplicator on dir different from pwd
    15  - -i, --interactive Delete files interactively
    16  - -m, --minsize <MINSIZE> Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
    17  - -h, --help Print help information
    18  - -V, --version Print version information
     13 + -t, --types <TYPES> Filetypes to deduplicate [default = all]
     14 + --dir <DIR> Run Deduplicator on dir different from pwd
     15 + -i, --interactive Delete files interactively
     16 + --min-size <MIN_SIZE> Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T) [default: 1b]
     17 + -d, --max-depth <MAX_DEPTH> Max Depth to scan while looking for duplicates
     18 + --min-depth <MIN_DEPTH> Min Depth to scan while looking for duplicates
     19 + --follow-links Follow links while scanning directories
     20 + -h, --help Print help information
     21 + -V, --version Print version information
    19 22  ```
    20 23   
    21 24  ## Installation
    skipped 53 lines
    75 78  |:---|:---|---:|---:|---:|---:|
    76 79  | `deduplicator --dir ~/Data/tmp` | (~120G) | 27.5 ± 1.0 | 26.0 | 32.1 | 1.70 ± 0.09 |
    77 80  | `deduplicator --dir ~/Data/books` | (~8.6G) | 21.8 ± 0.7 | 20.5 | 24.4 | 1.35 ± 0.07 |
    78  -| `deduplicator --dir ~/Data/books --minsize 10M` | (~8.6G) | 16.1 ± 0.6 | 14.9 | 18.8 | 1.00 |
     81 +| `deduplicator --dir ~/Data/books --min-size 10M` | (~8.6G) | 16.1 ± 0.6 | 14.9 | 18.8 | 1.00 |
    79 82  | `deduplicator --dir ~/Data/ --types pdf,jpg,png,jpeg` | (~290G) | 1857.4 ± 24.5 | 1817.0 | 1895.5 | 115.07 ± 4.64 |
    80 83   
    81 84  * The last entry is lower because of the number of files deduplicator had to go through (~660895 Files). The average size of the files rarely affect the performance of deduplicator.
    skipped 20 lines
  • ■ ■ ■ ■ ■ ■
    src/filters.rs
    1 1  use crate::file_manager::File;
    2 2  use crate::params::Params;
    3 3   
    4  -pub fn is_file_gt_minsize(app_opts: &Params, file: &File) -> bool {
    5  - match app_opts.get_minsize() {
     4 +pub fn is_file_gt_min_size(app_opts: &Params, file: &File) -> bool {
     5 + match app_opts.get_min_size() {
    6 6   Some(msize) => match file.size {
    7 7   Some(fsize) => fsize >= msize,
    8 8   None => true,
    skipped 5 lines
  • ■ ■ ■ ■ ■
    src/params.rs
    skipped 6 lines
    7 7  #[derive(Parser, Debug)]
    8 8  #[command(author, version, about, long_about = None)]
    9 9  pub struct Params {
    10  - /// Filetypes to deduplicate (default = all)
     10 + /// Filetypes to deduplicate [default = all]
    11 11   #[arg(short, long)]
    12 12   pub types: Option<String>,
    13 13   /// Run Deduplicator on dir different from pwd
    skipped 2 lines
    16 16   /// Delete files interactively
    17 17   #[arg(long, short)]
    18 18   pub interactive: bool,
    19  - /// Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
    20  - #[arg(long, short)]
    21  - pub minsize: Option<String>,
     19 + /// Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T).
     20 + #[arg(long, default_value = "1b")]
     21 + pub min_size: Option<String>,
     22 + /// Max Depth to scan while looking for duplicates
     23 + #[arg(long, short = 'd')]
     24 + pub max_depth: Option<usize>,
     25 + /// Min Depth to scan while looking for duplicates
     26 + #[arg(long)]
     27 + pub min_depth: Option<usize>,
     28 + /// Follow links while scanning directories
     29 + #[arg(long)]
     30 + pub follow_links: bool,
    22 31  }
    23 32   
    24 33  impl Params {
    25  - pub fn get_minsize(&self) -> Option<u64> {
    26  - match &self.minsize {
     34 + pub fn get_min_size(&self) -> Option<u64> {
     35 + match &self.min_size {
    27 36   Some(msize) => match msize.parse::<bytesize::ByteSize>() {
    28 37   Ok(units) => Some(units.0),
    29 38   Err(_) => None,
    skipped 9 lines
    39 48   Ok(dir)
    40 49   }
    41 50   
     51 + fn add_glob_min_depth(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
     52 + match self.min_depth {
     53 + Some(mindepth) => Ok(builder.min_depth(mindepth)),
     54 + None => Ok(builder),
     55 + }
     56 + }
     57 + 
     58 + fn add_glob_max_depth(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
     59 + match self.max_depth {
     60 + Some(maxdepth) => Ok(builder.max_depth(maxdepth)),
     61 + None => Ok(builder),
     62 + }
     63 + }
     64 + 
     65 + fn add_glob_follow_links(&self, builder: GlobWalkerBuilder) -> Result<GlobWalkerBuilder> {
     66 + match self.follow_links {
     67 + true => Ok(builder.follow_links(true)),
     68 + false => Ok(builder.follow_links(false)),
     69 + }
     70 + }
     71 + 
    42 72   pub fn get_glob_walker(&self) -> Result<GlobWalker> {
    43 73   let pattern: String = match self.types.as_ref() {
    44 74   Some(filetypes) => format!("**/*{{{filetypes}}}"),
    45 75   None => "**/*".to_string(),
    46 76   };
    47  - // TODO: add params for maximum depth and following symlinks, then pass them to this builder
    48  - GlobWalkerBuilder::from_patterns(self.get_directory()?, &[pattern])
    49  - .build()
    50  - .map_err(|e| anyhow!(e))
     77 + 
     78 + let glob_walker_builder = self
     79 + .add_glob_min_depth(GlobWalkerBuilder::from_patterns(
     80 + self.get_directory()?,
     81 + &[pattern],
     82 + ))
     83 + .and_then(|builder| self.add_glob_max_depth(builder))
     84 + .and_then(|builder| self.add_glob_follow_links(builder))?;
     85 + 
     86 + glob_walker_builder.build().map_err(|e| anyhow!(e))
    51 87   }
    52 88  }
    53 89   
  • ■ ■ ■ ■
    src/scanner.rs
    skipped 62 lines
    63 63   hash: None,
    64 64   size: Some(fs::metadata(fpath).unwrap().len()),
    65 65   })
    66  - .filter(|file| filters::is_file_gt_minsize(app_opts, file))
     66 + .filter(|file| filters::is_file_gt_min_size(app_opts, file))
    67 67   .collect();
    68 68   
    69 69   Ok(files)
    skipped 65 lines
Please wait...
Page is in error, reload to recover