Projects STRLCPY deduplicator Commits 163e8d85
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    Cargo.lock
    skipped 2 lines
    3 3  version = 3
    4 4   
    5 5  [[package]]
     6 +name = "aho-corasick"
     7 +version = "0.7.20"
     8 +source = "registry+https://github.com/rust-lang/crates.io-index"
     9 +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
     10 +dependencies = [
     11 + "memchr",
     12 +]
     13 + 
     14 +[[package]]
    6 15  name = "android_system_properties"
    7 16  version = "0.1.5"
    8 17  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 44 lines
    53 62  ]
    54 63   
    55 64  [[package]]
     65 +name = "bstr"
     66 +version = "1.1.0"
     67 +source = "registry+https://github.com/rust-lang/crates.io-index"
     68 +checksum = "b45ea9b00a7b3f2988e9a65ad3917e62123c38dba709b666506207be96d1790b"
     69 +dependencies = [
     70 + "memchr",
     71 + "serde",
     72 +]
     73 + 
     74 +[[package]]
    56 75  name = "bumpalo"
    57 76  version = "3.11.1"
    58 77  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 170 lines
    229 248  source = "registry+https://github.com/rust-lang/crates.io-index"
    230 249  checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
    231 250  dependencies = [
    232  - "bstr",
     251 + "bstr 0.2.17",
    233 252   "csv-core",
    234 253   "itoa",
    235 254   "ryu",
    skipped 69 lines
    305 324   
    306 325  [[package]]
    307 326  name = "deduplicator"
    308  -version = "0.1.3"
     327 +version = "0.1.4"
    309 328  dependencies = [
    310 329   "anyhow",
    311 330   "bytesize",
    skipped 2 lines
    314 333   "colored",
    315 334   "dashmap",
    316 335   "fxhash",
    317  - "glob",
     336 + "globwalk",
    318 337   "indicatif",
    319 338   "itertools",
    320 339   "memmap2",
    skipped 65 lines
    386 405  ]
    387 406   
    388 407  [[package]]
     408 +name = "fnv"
     409 +version = "1.0.7"
     410 +source = "registry+https://github.com/rust-lang/crates.io-index"
     411 +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
     412 + 
     413 +[[package]]
    389 414  name = "fxhash"
    390 415  version = "0.2.1"
    391 416  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 14 lines
    406 431  ]
    407 432   
    408 433  [[package]]
    409  -name = "glob"
    410  -version = "0.3.0"
     434 +name = "globset"
     435 +version = "0.4.10"
     436 +source = "registry+https://github.com/rust-lang/crates.io-index"
     437 +checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc"
     438 +dependencies = [
     439 + "aho-corasick",
     440 + "bstr 1.1.0",
     441 + "fnv",
     442 + "log",
     443 + "regex",
     444 +]
     445 + 
     446 +[[package]]
     447 +name = "globwalk"
     448 +version = "0.8.1"
    411 449  source = "registry+https://github.com/rust-lang/crates.io-index"
    412  -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
     450 +checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc"
     451 +dependencies = [
     452 + "bitflags",
     453 + "ignore",
     454 + "walkdir",
     455 +]
    413 456   
    414 457  [[package]]
    415 458  name = "hashbrown"
    skipped 50 lines
    466 509  ]
    467 510   
    468 511  [[package]]
     512 +name = "ignore"
     513 +version = "0.4.20"
     514 +source = "registry+https://github.com/rust-lang/crates.io-index"
     515 +checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492"
     516 +dependencies = [
     517 + "globset",
     518 + "lazy_static",
     519 + "log",
     520 + "memchr",
     521 + "regex",
     522 + "same-file",
     523 + "thread_local",
     524 + "walkdir",
     525 + "winapi-util",
     526 +]
     527 + 
     528 +[[package]]
    469 529  name = "indicatif"
    470 530  version = "0.17.2"
    471 531  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 316 lines
    788 848  ]
    789 849   
    790 850  [[package]]
     851 +name = "regex"
     852 +version = "1.7.1"
     853 +source = "registry+https://github.com/rust-lang/crates.io-index"
     854 +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
     855 +dependencies = [
     856 + "aho-corasick",
     857 + "memchr",
     858 + "regex-syntax",
     859 +]
     860 + 
     861 +[[package]]
    791 862  name = "regex-automata"
    792 863  version = "0.1.10"
    793 864  source = "registry+https://github.com/rust-lang/crates.io-index"
    794 865  checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
     866 + 
     867 +[[package]]
     868 +name = "regex-syntax"
     869 +version = "0.6.28"
     870 +source = "registry+https://github.com/rust-lang/crates.io-index"
     871 +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
    795 872   
    796 873  [[package]]
    797 874  name = "rustix"
    skipped 22 lines
    820 897  checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde"
    821 898   
    822 899  [[package]]
     900 +name = "same-file"
     901 +version = "1.0.6"
     902 +source = "registry+https://github.com/rust-lang/crates.io-index"
     903 +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
     904 +dependencies = [
     905 + "winapi-util",
     906 +]
     907 + 
     908 +[[package]]
    823 909  name = "scopeguard"
    824 910  version = "1.1.0"
    825 911  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 94 lines
    920 1006  ]
    921 1007   
    922 1008  [[package]]
     1009 +name = "thread_local"
     1010 +version = "1.1.4"
     1011 +source = "registry+https://github.com/rust-lang/crates.io-index"
     1012 +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
     1013 +dependencies = [
     1014 + "once_cell",
     1015 +]
     1016 + 
     1017 +[[package]]
    923 1018  name = "time"
    924 1019  version = "0.1.45"
    925 1020  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 58 lines
    984 1079  version = "0.9.4"
    985 1080  source = "registry+https://github.com/rust-lang/crates.io-index"
    986 1081  checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
     1082 + 
     1083 +[[package]]
     1084 +name = "walkdir"
     1085 +version = "2.3.2"
     1086 +source = "registry+https://github.com/rust-lang/crates.io-index"
     1087 +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
     1088 +dependencies = [
     1089 + "same-file",
     1090 + "winapi",
     1091 + "winapi-util",
     1092 +]
    987 1093   
    988 1094  [[package]]
    989 1095  name = "wasi"
    skipped 152 lines
  • ■ ■ ■ ■ ■ ■
    Cargo.toml
    1 1  [package]
    2 2  name = "deduplicator"
    3  -version = "0.1.3"
     3 +version = "0.1.4"
    4 4  edition = "2021"
    5 5  description = "find,filter,delete Duplicates"
    6 6  license = "MIT"
    7  -authors = ["Sreedev Kodichath <[email protected]>", "Valentin Bersier <[email protected]>", "Dhruva Sagar <[email protected]>"]
     7 +authors = [
     8 + "Sreedev Kodichath <[email protected]>",
     9 + "Valentin Bersier <[email protected]>",
     10 + "Dhruva Sagar <[email protected]>",
     11 +]
    8 12   
    9 13  # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
    10 14   
    skipped 5 lines
    16 20  colored = "2.0.0"
    17 21  dashmap = { version = "5.4.0", features = ["rayon"] }
    18 22  fxhash = "0.2.1"
    19  -glob = "0.3.0"
     23 +globwalk = "0.8.1"
    20 24  indicatif = { version = "0.17.2", features = ["rayon", "tokio"] }
    21 25  itertools = "0.10.5"
    22 26  memmap2 = "0.5.8"
    skipped 6 lines
  • ■ ■ ■ ■ ■
    src/output.rs
    skipped 4 lines
    5 5  use chrono::DateTime;
    6 6  use colored::Colorize;
    7 7  use dashmap::DashMap;
     8 +use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};
    8 9  use itertools::Itertools;
    9 10  use prettytable::{format, row, Table};
    10 11  use std::io::Write;
    skipped 1 lines
    12 13  use unicode_segmentation::UnicodeSegmentation;
    13 14   
    14 15  fn format_path(path: &str, opts: &Params) -> Result<String> {
    15  - let display_path = path.replace(&opts.get_directory()?, "");
     16 + let display_path = path.replace(opts.get_directory()?.to_string_lossy().as_ref(), "");
    16 17   let display_range = if display_path.chars().count() > 32 {
    17 18   display_path
    18 19   .graphemes(true)
    skipped 7 lines
    26 27   display_path
    27 28   };
    28 29   
    29  - Ok(format!("...{:<32}", display_range))
     30 + Ok(format!("...{display_range:<32}"))
    30 31  }
    31 32   
    32 33  fn file_size(file: &File) -> Result<String> {
    skipped 120 lines
    153 154   }
    154 155   
    155 156   let mut output_table = Table::new();
     157 + let progress_bar = ProgressBar::new(duplicates.len() as u64);
     158 + let progress_style = ProgressStyle::default_bar()
     159 + .template("{spinner:.green} [generating output] [{wide_bar:.cyan/blue}] {pos}/{len} files")
     160 + .unwrap();
     161 + 
     162 + progress_bar.set_style(progress_style);
    156 163   output_table.set_titles(row!["hash", "duplicates"]);
     164 + 
    157 165   duplicates
    158 166   .into_iter()
    159 167   .sorted_unstable_by_key(|(_, f)| f.first().and_then(|ff| ff.size).unwrap_or_default())
     168 + .progress_with(progress_bar)
    160 169   .for_each(|(hash, group)| {
    161 170   let mut inner_table = Table::new();
    162 171   inner_table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
    skipped 13 lines
  • ■ ■ ■ ■ ■ ■
    src/params.rs
     1 +use std::{fs, path::PathBuf};
     2 + 
    1 3  use anyhow::{anyhow, Result};
    2 4  use clap::{Parser, ValueHint};
    3  -use std::{fs, path::PathBuf};
     5 +use globwalk::{GlobWalker, GlobWalkerBuilder};
    4 6   
    5 7  #[derive(Parser, Debug)]
    6 8  #[command(author, version, about, long_about = None)]
    skipped 23 lines
    30 32   }
    31 33   }
    32 34   
    33  - pub fn get_directory(&self) -> Result<String> {
    34  - let dir_pathbuf: PathBuf = self
    35  - .dir
    36  - .as_ref()
    37  - .unwrap_or(&std::env::current_dir()?)
    38  - .as_os_str()
    39  - .into();
    40  - 
    41  - let dir = fs::canonicalize(dir_pathbuf)?
    42  - .as_os_str()
    43  - .to_str()
    44  - .ok_or_else(|| anyhow!("Invalid directory"))?
    45  - .to_string();
    46  - 
     35 + pub fn get_directory(&self) -> Result<PathBuf> {
     36 + let current_dir = std::env::current_dir()?;
     37 + let dir_path = self.dir.as_ref().unwrap_or(&current_dir).as_path();
     38 + let dir = fs::canonicalize(dir_path)?;
    47 39   Ok(dir)
    48 40   }
    49 41   
    50  - pub fn get_glob_patterns(&self) -> PathBuf {
    51  - match self.types.as_ref() {
    52  - Some(filetypes) => vec![
    53  - self.get_directory().unwrap(),
    54  - String::from("**"),
    55  - format!("{{{}}}", filetypes),
    56  - ]
    57  - .iter()
    58  - .collect::<PathBuf>(),
    59  - None => vec![self.get_directory().unwrap().as_str(), "**", "*"]
    60  - .iter()
    61  - .collect::<PathBuf>(),
    62  - }
     42 + pub fn get_glob_walker(&self) -> Result<GlobWalker> {
     43 + let pattern: String = match self.types.as_ref() {
     44 + Some(filetypes) => format!("**/*{{{filetypes}}}"),
     45 + None => "**/*".to_string(),
     46 + };
     47 + // TODO: add params for maximum depth and following symlinks, then pass them to this builder
     48 + GlobWalkerBuilder::from_patterns(self.get_directory()?, &[pattern])
     49 + .build()
     50 + .map_err(|e| anyhow!(e))
    63 51   }
    64 52  }
    65 53   
  • ■ ■ ■ ■ ■ ■
    src/scanner.rs
    skipped 1 lines
    2 2  use anyhow::Result;
    3 3  use dashmap::DashMap;
    4 4  use fxhash::hash64 as hasher;
    5  -use glob::glob;
    6  -use indicatif::{ParallelProgressIterator, ProgressStyle};
     5 +use indicatif::{ParallelProgressIterator, ProgressBar, ProgressIterator, ProgressStyle};
    7 6  use memmap2::Mmap;
    8 7  use rayon::prelude::*;
    9 8  use std::hash::Hasher;
     9 +use std::time::Duration;
    10 10  use std::{fs, path::PathBuf};
    11 11   
    12 12  #[derive(Clone, Copy)]
    skipped 27 lines
    40 40  }
    41 41   
    42 42  fn scan(app_opts: &Params) -> Result<Vec<File>> {
    43  - let glob_patterns = app_opts.get_glob_patterns().display().to_string();
    44  - let glob_iter = glob(&glob_patterns)?;
    45  - let files = glob_iter
    46  - .filter(Result::is_ok)
    47  - .map(|file| file.unwrap())
     43 + let walker = app_opts.get_glob_walker()?;
     44 + let progress = ProgressBar::new_spinner();
     45 + let progress_style =
     46 + ProgressStyle::with_template("{spinner:.green} [mapping paths] {pos} paths")?;
     47 + progress.set_style(progress_style);
     48 + progress.enable_steady_tick(Duration::from_millis(100));
     49 + 
     50 + let files = walker
     51 + .progress_with(progress)
     52 + .filter_map(Result::ok)
     53 + .map(|file| file.into_path())
    48 54   .filter(|fpath| fpath.is_file())
    49 55   .collect::<Vec<PathBuf>>()
    50 56   .into_par_iter()
    51 57   .progress_with_style(ProgressStyle::with_template(
    52  - "{spinner:.green} [processing scan results] [{wide_bar:.cyan/blue}] {pos}/{len} files",
     58 + "{spinner:.green} [processing mapped paths] [{wide_bar:.cyan/blue}] {pos}/{len} files",
    53 59   )?)
    54 60   .map(|fpath| fpath.display().to_string())
    55 61   .map(|fpath| File {
    skipped 73 lines
Please wait...
Page is in error, reload to recover