Projects STRLCPY deduplicator Commits 5aea0eb6
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    Cargo.lock
    skipped 70 lines
    71 71  checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
    72 72   
    73 73  [[package]]
     74 +name = "bytesize"
     75 +version = "1.1.0"
     76 +source = "registry+https://github.com/rust-lang/crates.io-index"
     77 +checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70"
     78 + 
     79 +[[package]]
    74 80  name = "cc"
    75 81  version = "1.0.78"
    76 82  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 225 lines
    302 308  version = "0.1.1"
    303 309  dependencies = [
    304 310   "anyhow",
     311 + "bytesize",
    305 312   "chrono",
    306 313   "clap",
    307 314   "colored",
    308 315   "dashmap",
    309 316   "fxhash",
    310 317   "glob",
    311  - "humansize",
    312 318   "indicatif",
    313 319   "itertools",
    314 320   "memmap2",
    skipped 121 lines
    436 442  ]
    437 443   
    438 444  [[package]]
    439  -name = "humansize"
    440  -version = "2.1.2"
    441  -source = "registry+https://github.com/rust-lang/crates.io-index"
    442  -checksum = "4e682e2bd70ecbcce5209f11a992a4ba001fea8e60acf7860ce007629e6d2756"
    443  -dependencies = [
    444  - "libm",
    445  -]
    446  - 
    447  -[[package]]
    448 445  name = "iana-time-zone"
    449 446  version = "0.1.53"
    450 447  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 88 lines
    539 536  version = "0.2.139"
    540 537  source = "registry+https://github.com/rust-lang/crates.io-index"
    541 538  checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
    542  - 
    543  -[[package]]
    544  -name = "libm"
    545  -version = "0.2.6"
    546  -source = "registry+https://github.com/rust-lang/crates.io-index"
    547  -checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
    548 539   
    549 540  [[package]]
    550 541  name = "link-cplusplus"
    skipped 600 lines
  • ■ ■ ■ ■
    Cargo.toml
    skipped 9 lines
    10 10   
    11 11  [dependencies]
    12 12  anyhow = "1.0.68"
     13 +bytesize = "1.1.0"
    13 14  chrono = "0.4.23"
    14 15  clap = { version = "4.0.32", features = ["derive"] }
    15 16  colored = "2.0.0"
    16 17  dashmap = { version = "5.4.0", features = ["rayon"] }
    17 18  fxhash = "0.2.1"
    18 19  glob = "0.3.0"
    19  -humansize = "2.1.2"
    20 20  indicatif = { version = "0.17.2", features = ["rayon", "tokio"] }
    21 21  itertools = "0.10.5"
    22 22  memmap2 = "0.5.8"
    skipped 6 lines
  • ■ ■ ■ ■ ■ ■
    README.md
    skipped 13 lines
    14 14  Usage: deduplicator [OPTIONS]
    15 15   
    16 16  Options:
    17  - -t, --types <TYPES> Filetypes to deduplicate (default = all)
    18  - --dir <DIR> Run Deduplicator on dir different from pwd
    19  - -i, --interactive Delete files interactively
    20  - -h, --help Print help information
    21  - -V, --version Print version information
     17 + -t, --types <TYPES> Filetypes to deduplicate (default = all)
     18 + --dir <DIR> Run Deduplicator on dir different from pwd
     19 + -i, --interactive Delete files interactively
     20 + -m, --minsize <MINSIZE> Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
     21 + -h, --help Print help information
     22 + -V, --version Print version information
    22 23  ```
    23 24   
    24 25  <h2 align="center">Installation</h2>
    skipped 27 lines
  • ■ ■ ■ ■ ■ ■
    src/filters.rs
     1 +use crate::file_manager::File;
     2 +use crate::params::Params;
     3 + 
     4 +pub fn is_file_gt_minsize(app_opts: &Params, file: &File) -> bool {
     5 + match app_opts.get_minsize() {
     6 + Some(msize) => match file.size {
     7 + Some(fsize) => fsize >= msize,
     8 + None => true,
     9 + },
     10 + None => true,
     11 + }
     12 +}
     13 + 
  • ■ ■ ■ ■ ■
    src/main.rs
    skipped 2 lines
    3 3  mod output;
    4 4  mod params;
    5 5  mod scanner;
     6 +mod filters;
    6 7   
    7 8  use anyhow::Result;
    8 9  use app::App;
    skipped 7 lines
  • ■ ■ ■ ■ ■ ■
    src/output.rs
    skipped 4 lines
    5 5  use chrono::DateTime;
    6 6  use colored::Colorize;
    7 7  use dashmap::DashMap;
    8  -use humansize::{format_size, DECIMAL};
    9 8  use itertools::Itertools;
    10 9  use prettytable::{format, row, Table};
    11 10  use std::io::Write;
    skipped 18 lines
    30 29   Ok(format!("...{:<32}", display_range))
    31 30  }
    32 31   
    33  -fn file_size(path: &String) -> Result<String> {
    34  - let mdata = fs::metadata(path)?;
    35  - let formatted_size = format!("{:>12}", format_size(mdata.len(), DECIMAL));
    36  - Ok(formatted_size)
     32 +fn file_size(file: &File) -> Result<String> {
     33 + Ok(format!("{:>12}", bytesize::ByteSize::b(file.size.unwrap())))
    37 34  }
    38 35   
    39 36  fn modified_time(path: &String) -> Result<String> {
    skipped 79 lines
    119 116   
    120 117  pub fn interactive(duplicates: DashMap<String, Vec<File>>, opts: &Params) {
    121 118   print_meta_info();
     119 + 
     120 + if duplicates.is_empty() {
     121 + println!(
     122 + "\n{}",
     123 + "No duplicates found matching your search criteria.".green()
     124 + );
     125 + return;
     126 + }
     127 + 
    122 128   duplicates
    123 129   .clone()
    124 130   .into_iter()
     131 + .sorted_unstable_by_key(|f| {
     132 + -(f.1.first().and_then(|ff| ff.size).unwrap_or_default() as i64)
     133 + }) // sort by descending file size in interactive mode
    125 134   .enumerate()
    126 135   .for_each(|(gindex, (_, group))| {
    127 136   let mut itable = Table::new();
    skipped 3 lines
    131 140   itable.add_row(row![
    132 141   index,
    133 142   format_path(&file.path, opts).unwrap_or_default().blue(),
    134  - file_size(&file.path).unwrap_or_default().red(),
     143 + file_size(&file).unwrap_or_default().red(),
    135 144   modified_time(&file.path).unwrap_or_default().yellow()
    136 145   ]);
    137 146   });
    skipped 5 lines
    143 152  pub fn print(duplicates: DashMap<String, Vec<File>>, opts: &Params) {
    144 153   print_meta_info();
    145 154   
     155 + if duplicates.is_empty() {
     156 + println!(
     157 + "\n{}",
     158 + "No duplicates found matching your search criteria.".green()
     159 + );
     160 + return;
     161 + }
     162 + 
    146 163   let mut output_table = Table::new();
    147 164   output_table.set_titles(row!["hash", "duplicates"]);
    148  - duplicates.into_iter().for_each(|(hash, group)| {
    149  - let mut inner_table = Table::new();
    150  - inner_table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
    151  - group.iter().for_each(|file| {
    152  - inner_table.add_row(row![
    153  - format_path(&file.path, opts).unwrap_or_default().blue(),
    154  - file_size(&file.path).unwrap_or_default().red(),
    155  - modified_time(&file.path).unwrap_or_default().yellow()
    156  - ]);
     165 + duplicates
     166 + .into_iter()
     167 + .sorted_unstable_by_key(|f| f.1.first().and_then(|ff| ff.size).unwrap_or_default()) // sort by ascending size
     168 + .for_each(|(hash, group)| {
     169 + let mut inner_table = Table::new();
     170 + inner_table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
     171 + group.iter().for_each(|file| {
     172 + inner_table.add_row(row![
     173 + format_path(&file.path, opts).unwrap_or_default().blue(),
     174 + file_size(&file).unwrap_or_default().red(),
     175 + modified_time(&file.path).unwrap_or_default().yellow()
     176 + ]);
     177 + });
     178 + output_table.add_row(row![hash.green(), inner_table]);
    157 179   });
    158  - output_table.add_row(row![hash.green(), inner_table]);
    159  - });
    160 180   
    161 181   output_table.printstd();
    162 182  }
    skipped 1 lines
  • ■ ■ ■ ■ ■
    src/params.rs
    1 1  use anyhow::{anyhow, Result};
    2  -use clap::Parser;
     2 +use clap::{Parser, ValueHint};
    3 3  use std::{fs, path::PathBuf};
    4 4   
    5 5  #[derive(Parser, Debug)]
    skipped 3 lines
    9 9   #[arg(short, long)]
    10 10   pub types: Option<String>,
    11 11   /// Run Deduplicator on dir different from pwd
    12  - #[arg(long)]
     12 + #[arg(long, value_hint = ValueHint::DirPath)]
    13 13   pub dir: Option<PathBuf>,
    14 14   /// Delete files interactively
    15 15   #[arg(long, short)]
    16 16   pub interactive: bool,
     17 + /// Minimum filesize of duplicates to scan (e.g., 100B/1K/2M/3G/4T). [default = 0]
     18 + #[arg(long, short)]
     19 + pub minsize: Option<String>,
    17 20  }
    18 21   
    19 22  impl Params {
     23 + pub fn get_minsize(&self) -> Option<u64> {
     24 + match &self.minsize {
     25 + Some(msize) => match msize.parse::<bytesize::ByteSize>() {
     26 + Ok(units) => Some(units.0),
     27 + Err(_) => None,
     28 + },
     29 + None => None,
     30 + }
     31 + }
     32 + 
    20 33   pub fn get_directory(&self) -> Result<String> {
    21 34   let dir_pathbuf: PathBuf = self
    22 35   .dir
    skipped 29 lines
  • ■ ■ ■ ■ ■ ■
    src/scanner.rs
     1 +use crate::{file_manager::File, filters, params::Params};
    1 2  use anyhow::Result;
    2 3  use dashmap::DashMap;
    3 4  use fxhash::hash64 as hasher;
    skipped 3 lines
    7 8  use rayon::prelude::*;
    8 9  use std::hash::Hasher;
    9 10  use std::{fs, path::PathBuf};
    10  - 
    11  -use crate::{file_manager::File, params::Params};
    12 11   
    13 12  #[derive(Clone, Copy)]
    14 13  enum IndexCritera {
    skipped 13 lines
    28 27   .collect::<Vec<File>>();
    29 28   
    30 29   if sizewize_duplicate_files.len() > 1 {
    31  - let size_wise_duplicate_paths = sizewize_duplicate_files
    32  - .into_par_iter()
    33  - .map(|file| file.path)
    34  - .collect::<Vec<String>>();
    35  - 
    36  - let hash_index_store = index_files(size_wise_duplicate_paths, IndexCritera::Hash)?;
     30 + let hash_index_store = index_files(sizewize_duplicate_files, IndexCritera::Hash)?;
    37 31   let duplicate_files = hash_index_store
    38 32   .into_par_iter()
    39 33   .filter(|(_, files)| files.len() > 1)
    skipped 5 lines
    45 39   }
    46 40  }
    47 41   
    48  -fn scan(app_opts: &Params) -> Result<Vec<String>> {
     42 +fn scan(app_opts: &Params) -> Result<Vec<File>> {
    49 43   let glob_patterns: Vec<PathBuf> = app_opts.get_glob_patterns();
    50  - let files: Vec<String> = glob_patterns
     44 + let files: Vec<File> = glob_patterns
    51 45   .par_iter()
    52 46   .progress_with_style(ProgressStyle::with_template(
    53 47   "{spinner:.green} [scanning files] [{wide_bar:.cyan/blue}] {pos}/{len} files",
    skipped 9 lines
    63 57   })
    64 58   .collect::<Vec<String>>()
    65 59   })
     60 + .map(|file_path| File {
     61 + path: file_path.clone(),
     62 + hash: None,
     63 + size: Some(fs::metadata(file_path).unwrap().len()),
     64 + })
     65 + .filter(|file| filters::is_file_gt_minsize(app_opts, file))
    66 66   .collect();
    67 67   
    68 68   Ok(files)
    69 69  }
    70 70   
    71  -fn process_file_size_index(fpath: String) -> Result<File> {
     71 +fn process_file_hash_index(file: &File) -> Result<File> {
    72 72   Ok(File {
    73  - path: fpath.clone(),
    74  - size: Some(fs::metadata(fpath)?.len()),
    75  - hash: None,
    76  - })
    77  -}
    78  - 
    79  -fn process_file_hash_index(fpath: String) -> Result<File> {
    80  - Ok(File {
    81  - path: fpath.clone(),
    82  - size: None,
    83  - hash: Some(hash_file(&fpath).unwrap_or_default()),
     73 + path: file.path.clone(),
     74 + size: file.size,
     75 + hash: Some(hash_file(&file.path).unwrap_or_default()),
    84 76   })
    85 77  }
    86 78   
    87 79  fn process_file_index(
    88  - fpath: String,
     80 + file: File,
    89 81   store: &DashMap<String, Vec<File>>,
    90 82   index_criteria: IndexCritera,
    91 83  ) {
    92 84   match index_criteria {
    93 85   IndexCritera::Size => {
    94  - let processed_file = process_file_size_index(fpath).unwrap();
    95 86   store
    96  - .entry(processed_file.size.unwrap_or_default().to_string())
    97  - .and_modify(|fileset| fileset.push(processed_file.clone()))
    98  - .or_insert_with(|| vec![processed_file]);
     87 + .entry(file.size.unwrap_or_default().to_string())
     88 + .and_modify(|fileset| fileset.push(file.clone()))
     89 + .or_insert_with(|| vec![file]);
    99 90   }
    100 91   IndexCritera::Hash => {
    101  - let processed_file = process_file_hash_index(fpath).unwrap();
     92 + let processed_file = process_file_hash_index(&file).unwrap();
    102 93   let indexhash = processed_file.clone().hash.unwrap_or_default();
    103 94   
    104 95   store
    skipped 5 lines
    110 101  }
    111 102   
    112 103  fn index_files(
    113  - files: Vec<String>,
     104 + files: Vec<File>,
    114 105   index_criteria: IndexCritera,
    115 106  ) -> Result<DashMap<String, Vec<File>>> {
    116 107   let store: DashMap<String, Vec<File>> = DashMap::new();
    skipped 7 lines
    124 115   Ok(store)
    125 116  }
    126 117   
    127  -pub fn incremental_hashing(filepath: &str) -> Result<String> {
     118 +fn incremental_hashing(filepath: &str) -> Result<String> {
    128 119   let file = fs::File::open(filepath)?;
    129 120   let fmap = unsafe { Mmap::map(&file)? };
    130 121   let mut inchasher = fxhash::FxHasher::default();
    skipped 4 lines
    135 126   Ok(format!("{}", inchasher.finish()))
    136 127  }
    137 128   
    138  -pub fn standard_hashing(filepath: &str) -> Result<String> {
     129 +fn standard_hashing(filepath: &str) -> Result<String> {
    139 130   let file = fs::read(filepath)?;
    140 131   Ok(hasher(&*file).to_string())
    141 132  }
    142 133   
    143  -pub fn hash_file(filepath: &str) -> Result<String> {
     134 +fn hash_file(filepath: &str) -> Result<String> {
    144 135   let filemeta = fs::metadata(filepath)?;
    145 136   
    146 137   // NOTE: USE INCREMENTAL HASHING ONLY FOR FILES > 100MB
    skipped 6 lines
Please wait...
Page is in error, reload to recover