Projects STRLCPY deduplicator Commits 01dd93a0
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    Cargo.lock
    skipped 70 lines
    71 71  checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
    72 72   
    73 73  [[package]]
    74  -name = "cassowary"
    75  -version = "0.3.0"
    76  -source = "registry+https://github.com/rust-lang/crates.io-index"
    77  -checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
    78  - 
    79  -[[package]]
    80 74  name = "cc"
    81 75  version = "1.0.78"
    82 76  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 141 lines
    224 218  ]
    225 219   
    226 220  [[package]]
    227  -name = "crossterm"
    228  -version = "0.25.0"
    229  -source = "registry+https://github.com/rust-lang/crates.io-index"
    230  -checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
    231  -dependencies = [
    232  - "bitflags",
    233  - "crossterm_winapi",
    234  - "libc",
    235  - "mio",
    236  - "parking_lot",
    237  - "signal-hook",
    238  - "signal-hook-mio",
    239  - "winapi",
    240  -]
    241  - 
    242  -[[package]]
    243  -name = "crossterm_winapi"
    244  -version = "0.9.0"
    245  -source = "registry+https://github.com/rust-lang/crates.io-index"
    246  -checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
    247  -dependencies = [
    248  - "winapi",
    249  -]
    250  - 
    251  -[[package]]
    252 221  name = "csv"
    253 222  version = "1.1.6"
    254 223  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 60 lines
    315 284  ]
    316 285   
    317 286  [[package]]
     287 +name = "dashmap"
     288 +version = "5.4.0"
     289 +source = "registry+https://github.com/rust-lang/crates.io-index"
     290 +checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
     291 +dependencies = [
     292 + "cfg-if",
     293 + "hashbrown",
     294 + "lock_api",
     295 + "once_cell",
     296 + "parking_lot_core",
     297 + "rayon",
     298 +]
     299 + 
     300 +[[package]]
    318 301  name = "deduplicator"
    319  -version = "0.0.9"
     302 +version = "0.1.1"
    320 303  dependencies = [
    321 304   "anyhow",
    322 305   "chrono",
    323 306   "clap",
    324 307   "colored",
    325  - "crossterm",
     308 + "dashmap",
    326 309   "fxhash",
    327 310   "glob",
    328 311   "humansize",
    329 312   "indicatif",
    330 313   "itertools",
     314 + "memmap2",
    331 315   "prettytable-rs",
    332 316   "rayon",
    333  - "sqlite",
    334 317   "thiserror",
    335 318   "tokio",
    336  - "tui",
    337 319   "unicode-segmentation",
    338 320  ]
    339 321   
    skipped 84 lines
    424 406  checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
    425 407   
    426 408  [[package]]
     409 +name = "hashbrown"
     410 +version = "0.12.3"
     411 +source = "registry+https://github.com/rust-lang/crates.io-index"
     412 +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
     413 + 
     414 +[[package]]
    427 415  name = "heck"
    428 416  version = "0.4.0"
    429 417  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 169 lines
    599 587  checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
    600 588   
    601 589  [[package]]
     590 +name = "memmap2"
     591 +version = "0.5.8"
     592 +source = "registry+https://github.com/rust-lang/crates.io-index"
     593 +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc"
     594 +dependencies = [
     595 + "libc",
     596 +]
     597 + 
     598 +[[package]]
    602 599  name = "memoffset"
    603 600  version = "0.7.1"
    604 601  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 91 lines
    696 693  checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
    697 694   
    698 695  [[package]]
    699  -name = "pkg-config"
    700  -version = "0.3.26"
    701  -source = "registry+https://github.com/rust-lang/crates.io-index"
    702  -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
    703  - 
    704  -[[package]]
    705 696  name = "portable-atomic"
    706 697  version = "0.3.19"
    707 698  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 148 lines
    856 847  checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
    857 848   
    858 849  [[package]]
    859  -name = "signal-hook"
    860  -version = "0.3.14"
    861  -source = "registry+https://github.com/rust-lang/crates.io-index"
    862  -checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d"
    863  -dependencies = [
    864  - "libc",
    865  - "signal-hook-registry",
    866  -]
    867  - 
    868  -[[package]]
    869  -name = "signal-hook-mio"
    870  -version = "0.2.3"
    871  -source = "registry+https://github.com/rust-lang/crates.io-index"
    872  -checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
    873  -dependencies = [
    874  - "libc",
    875  - "mio",
    876  - "signal-hook",
    877  -]
    878  - 
    879  -[[package]]
    880 850  name = "signal-hook-registry"
    881 851  version = "1.4.0"
    882 852  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 19 lines
    902 872  ]
    903 873   
    904 874  [[package]]
    905  -name = "sqlite"
    906  -version = "0.30.3"
    907  -source = "registry+https://github.com/rust-lang/crates.io-index"
    908  -checksum = "12e072cb5fb89b3fe5e9c9584676348feb503f9fb3ae829d9868171bc5372d48"
    909  -dependencies = [
    910  - "libc",
    911  - "sqlite3-sys",
    912  -]
    913  - 
    914  -[[package]]
    915  -name = "sqlite3-src"
    916  -version = "0.4.0"
    917  -source = "registry+https://github.com/rust-lang/crates.io-index"
    918  -checksum = "d1815a7a02c996eb8e5c64f61fcb6fd9b12e593ce265c512c5853b2513635691"
    919  -dependencies = [
    920  - "cc",
    921  - "pkg-config",
    922  -]
    923  - 
    924  -[[package]]
    925  -name = "sqlite3-sys"
    926  -version = "0.14.0"
    927  -source = "registry+https://github.com/rust-lang/crates.io-index"
    928  -checksum = "d47c99824fc55360ba00caf28de0b8a0458369b832e016a64c13af0ad9fbb9ee"
    929  -dependencies = [
    930  - "libc",
    931  - "sqlite3-src",
    932  -]
    933  - 
    934  -[[package]]
    935 875  name = "strsim"
    936 876  version = "0.10.0"
    937 877  source = "registry+https://github.com/rust-lang/crates.io-index"
    skipped 90 lines
    1028 968   "proc-macro2",
    1029 969   "quote",
    1030 970   "syn",
    1031  -]
    1032  - 
    1033  -[[package]]
    1034  -name = "tui"
    1035  -version = "0.19.0"
    1036  -source = "registry+https://github.com/rust-lang/crates.io-index"
    1037  -checksum = "ccdd26cbd674007e649a272da4475fb666d3aa0ad0531da7136db6fab0e5bad1"
    1038  -dependencies = [
    1039  - "bitflags",
    1040  - "cassowary",
    1041  - "crossterm",
    1042  - "unicode-segmentation",
    1043  - "unicode-width",
    1044 971  ]
    1045 972   
    1046 973  [[package]]
    skipped 177 lines
  • ■ ■ ■ ■ ■ ■
    Cargo.toml
    1 1  [package]
    2 2  name = "deduplicator"
    3  -version = "0.0.9"
     3 +version = "0.1.1"
    4 4  edition = "2021"
    5 5  description = "find,filter,delete Duplicates"
    6 6  license = "MIT"
    skipped 6 lines
    13 13  chrono = "0.4.23"
    14 14  clap = { version = "4.0.32", features = ["derive"] }
    15 15  colored = "2.0.0"
    16  -crossterm = "0.25.0"
     16 +dashmap = { version = "5.4.0", features = ["rayon"] }
    17 17  fxhash = "0.2.1"
    18 18  glob = "0.3.0"
    19 19  humansize = "2.1.2"
    20 20  indicatif = { version = "0.17.2", features = ["rayon", "tokio"] }
    21 21  itertools = "0.10.5"
     22 +memmap2 = "0.5.8"
    22 23  prettytable-rs = "0.10.0"
    23 24  rayon = "1.6.1"
    24  -sqlite = "0.30.3"
    25 25  thiserror = "1.0.38"
    26 26  tokio = { version = "1.23.0", features = ["full"] }
    27  -tui = "0.19.0"
    28 27  unicode-segmentation = "1.10.0"
    29 28   
  • ■ ■ ■ ■ ■
    README.md
    skipped 15 lines
    16 16  Options:
    17 17   -t, --types <TYPES> Filetypes to deduplicate (default = all)
    18 18   --dir <DIR> Run Deduplicator on dir different from pwd
    19  - -n, --nocache Don't use cache for indexing files (default = false)
    20 19   -i, --interactive Delete files interactively
    21 20   -h, --help Print help information
    22 21   -V, --version Print version information
    skipped 29 lines
  • ■ ■ ■ ■ ■ ■
    src/app/event_handler.rs
    1  -use std::time::Duration;
    2  - 
    3  -use anyhow::Result;
    4  -use crossterm::event::{self, KeyCode, KeyEvent};
    5  - 
    6  -use super::events;
    7  - 
    8  -pub struct EventHandler;
    9  - 
    10  -impl EventHandler {
    11  - pub fn init() -> Result<events::Event> {
    12  - if crossterm::event::poll(Duration::from_millis(10))? {
    13  - match event::read()? {
    14  - event::Event::Key(keycode) => Self::handle_keypress(keycode),
    15  - _ => Ok(events::Event::Noop),
    16  - }
    17  - } else {
    18  - Ok(events::Event::Noop)
    19  - }
    20  - }
    21  - 
    22  - fn handle_keypress(keyevent: KeyEvent) -> Result<events::Event> {
    23  - match keyevent.code {
    24  - KeyCode::Char('q') => Ok(events::Event::Exit),
    25  - _ => Ok(events::Event::Noop),
    26  - }
    27  - }
    28  -}
    29  - 
  • ■ ■ ■ ■ ■
    src/app/events.rs
    1  -pub enum Event {
    2  - Exit,
    3  - Noop,
    4  -}
    5  - 
  • ■ ■ ■ ■ ■
    src/app/formatter.rs
    1  - 
  • ■ ■ ■ ■ ■ ■
    src/app/mod.rs
    1  -#![allow(unused)]
    2  - 
    3  -mod event_handler;
    4  -mod events;
    5  -mod formatter;
    6  -mod ui;
    7  -pub mod file_manager;
    8  - 
    9  -use std::{io, thread, time::Duration};
    10  - 
    11  -use anyhow::{anyhow, Result};
    12  -use crossterm::{event, execute, terminal};
    13  -use event_handler::EventHandler;
    14  -use tui::{
    15  - backend::CrosstermBackend,
    16  - widgets::{Block, Borders, Widget},
    17  - Terminal,
    18  -};
    19  -use ui::Ui;
    20  - 
    21  -use crate::database;
    22  -use crate::output;
    23  -use crate::params::Params;
    24  -use crate::scanner;
    25  - 
    26  -pub struct App;
    27  - 
    28  -impl App {
    29  - pub fn init(app_args: &Params) -> Result<()> {
    30  - // let mut term = Self::init_terminal()?;
    31  - 
    32  - let connection = database::get_connection(app_args)?;
    33  - let duplicates = scanner::duplicates(app_args, &connection)?;
    34  - 
    35  - // Self::init_render_loop(&mut term)?;
    36  - // Self::cleanup(&mut term)?;
    37  - 
    38  - match app_args.interactive {
    39  - true => output::interactive(duplicates, app_args),
    40  - false => output::print(duplicates, app_args) /* TODO: APP TUI INIT FUNCTION */
    41  - }
    42  -
    43  - Ok(())
    44  - }
    45  - 
    46  - fn cleanup(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
    47  - terminal::disable_raw_mode()?;
    48  - execute!(
    49  - term.backend_mut(),
    50  - terminal::LeaveAlternateScreen,
    51  - event::DisableMouseCapture
    52  - )?;
    53  - 
    54  - term.show_cursor()?;
    55  - Ok(())
    56  - }
    57  - 
    58  - fn render_cycle(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
    59  - match EventHandler::init()? {
    60  - events::Event::Noop => Ui::render_frame(term),
    61  - events::Event::Exit => Err(anyhow!("Exit")),
    62  - }
    63  - }
    64  - 
    65  - fn init_render_loop(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
    66  - // this could be simplified with a `while Self::render_cycle(term).is_ok() {}` in the current state, but maybe
    67  - // it's good to keep it to handle errors in the future
    68  - loop {
    69  - match Self::render_cycle(term) {
    70  - Ok(_) => continue,
    71  - Err(_) => break,
    72  - }
    73  - }
    74  - 
    75  - Ok(())
    76  - }
    77  - 
    78  - fn init_terminal() -> Result<Terminal<CrosstermBackend<io::Stdout>>> {
    79  - terminal::enable_raw_mode()?;
    80  - let mut stdout = io::stdout();
    81  - execute!(
    82  - stdout,
    83  - terminal::EnterAlternateScreen,
    84  - event::EnableMouseCapture
    85  - )?;
    86  - let backend = CrosstermBackend::new(stdout);
    87  - Ok(Terminal::new(backend)?)
    88  - }
    89  -}
    90  - 
  • ■ ■ ■ ■ ■ ■
    src/app/ui.rs
    1  -use std::io;
    2  - 
    3  -use anyhow::Result;
    4  -use tui::{
    5  - backend::{Backend, CrosstermBackend},
    6  - layout::{Constraint, Direction, Layout, Rect},
    7  - style::{Modifier, Style},
    8  - text::{Span, Spans},
    9  - widgets::{Block, Borders, List, ListItem, Widget},
    10  - Frame, Terminal,
    11  -};
    12  - 
    13  -pub struct Ui;
    14  - 
    15  -impl Ui {
    16  - fn generate_file_list() -> impl Widget {
    17  - let tasks: Vec<ListItem> = vec!["Sreedev"; 100]
    18  - .into_iter()
    19  - .map(|item| ListItem::new(vec![Spans::from(Span::raw(item))]))
    20  - .collect();
    21  - 
    22  - List::new(tasks)
    23  - .block(Block::default().borders(Borders::ALL).title("List"))
    24  - .highlight_style(Style::default().add_modifier(Modifier::BOLD))
    25  - .highlight_symbol("> ")
    26  - }
    27  - 
    28  - fn generate_info_bar() -> impl Widget {
    29  - Block::default().title("Description").borders(Borders::ALL)
    30  - }
    31  - 
    32  - fn generate_file_desc() -> impl Widget {
    33  - Block::default().title("Description").borders(Borders::ALL)
    34  - }
    35  - 
    36  - pub fn render_frame(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
    37  - term.draw(|f| {
    38  - let windows = Layout::default()
    39  - .direction(Direction::Vertical)
    40  - .constraints([Constraint::Ratio(2, 16), Constraint::Ratio(14, 16)].as_ref())
    41  - .split(f.size());
    42  - 
    43  - let subwindows = Layout::default()
    44  - .direction(Direction::Horizontal)
    45  - .constraints([Constraint::Ratio(1, 4), Constraint::Ratio(3, 4)].as_ref())
    46  - .split(windows[1]);
    47  - 
    48  - f.render_widget(Self::generate_info_bar(), windows[0]);
    49  - f.render_widget(Self::generate_file_list(), subwindows[0]);
    50  - f.render_widget(Self::generate_file_desc(), subwindows[1]);
    51  - })?;
    52  - Ok(())
    53  - }
    54  -}
    55  - 
  • ■ ■ ■ ■ ■ ■
    src/app.rs
     1 +use crate::output;
     2 +use crate::params::Params;
     3 +use crate::scanner;
     4 +use anyhow::Result;
     5 + 
     6 +pub struct App;
     7 + 
     8 +impl App {
     9 + pub fn init(app_args: &Params) -> Result<()> {
     10 + let duplicates = scanner::duplicates(app_args)?;
     11 + match app_args.interactive {
     12 + true => output::interactive(duplicates, app_args),
     13 + false => output::print(duplicates, app_args),
     14 + }
     15 + 
     16 + Ok(())
     17 + }
     18 +}
     19 + 
  • ■ ■ ■ ■ ■ ■
    src/database.rs
    1  -use std::env::temp_dir;
    2  - 
    3  -use anyhow::Result;
    4  - 
    5  -use crate::params::Params;
    6  - 
    7  -#[derive(Debug, Clone)]
    8  -pub struct File {
    9  - pub path: String,
    10  - pub hash: String,
    11  -}
    12  - 
    13  -fn db_connection_url(args: &Params) -> String {
    14  - match args.nocache {
    15  - true => String::from(":memory:"),
    16  - false => {
    17  - let temp_dir_path = temp_dir();
    18  - format!("{}/deduplicator.db", temp_dir_path.display())
    19  - }
    20  - }
    21  -}
    22  - 
    23  -pub fn get_connection(args: &Params) -> Result<sqlite::Connection, sqlite::Error> {
    24  - sqlite::open(db_connection_url(args)).and_then(|conn| {
    25  - setup(&conn).ok();
    26  - Ok(conn)
    27  - })
    28  -}
    29  - 
    30  -pub fn setup(connection: &sqlite::Connection) -> Result<()> {
    31  - let query = "CREATE TABLE files (file_identifier STRING, hash STRING)";
    32  - connection.execute(query).ok();
    33  - Ok(())
    34  -}
    35  - 
    36  -pub fn put(file: &File, connection: &sqlite::Connection) -> Result<()> {
    37  - let query = format!(
    38  - "INSERT INTO files (file_identifier, hash) VALUES (\"{}\", \"{}\")",
    39  - file.path, file.hash
    40  - );
    41  - connection.execute(query)?;
    42  - Ok(())
    43  -}
    44  - 
    45  -pub fn indexed_paths(connection: &sqlite::Connection) -> Result<Vec<File>> {
    46  - let query = "SELECT * FROM files";
    47  - 
    48  - let result: Vec<File> = connection
    49  - .prepare(query)?
    50  - .into_iter()
    51  - .filter_map(|row_result| row_result.ok())
    52  - .map(|row| {
    53  - let path = row.read::<&str, _>("file_identifier").to_string();
    54  - let hash = row.read::<i64, _>("hash").to_string();
    55  - File { path, hash }
    56  - })
    57  - .collect();
    58  - 
    59  - Ok(result)
    60  -}
    61  - 
    62  -pub fn duplicate_hashes(connection: &sqlite::Connection, path: &str) -> Result<Vec<File>> {
    63  - let query = format!(
    64  - "
    65  - SELECT a.* FROM files a
    66  - JOIN (SELECT file_identifier, hash, COUNT(*)
    67  - FROM files
    68  - GROUP BY hash
    69  - HAVING count(*) > 1 ) b
    70  - ON a.hash = b.hash
    71  - WHERE a.file_identifier LIKE \"{}%\"
    72  - ORDER BY a.file_identifier
    73  - ",
    74  - path
    75  - );
    76  - 
    77  - let result: Vec<File> = connection
    78  - .prepare(query)?
    79  - .into_iter()
    80  - .filter_map(|row_result| row_result.ok())
    81  - .map(|row| {
    82  - let path = row.read::<&str, _>("file_identifier").to_string();
    83  - let hash = row.read::<i64, _>("hash").to_string();
    84  - File { path, hash }
    85  - })
    86  - .collect();
    87  - 
    88  - Ok(result)
    89  -}
    90  - 
  • ■ ■ ■ ■ ■
    src/app/file_manager.rs src/file_manager.rs
    1  -use crate::database::File;
    2 1  use anyhow::Result;
    3 2  use colored::Colorize;
    4 3   
     4 +#[derive(Debug, Clone)]
     5 +pub struct File {
     6 + pub path: String,
     7 + pub size: Option<u64>,
     8 + pub hash: Option<String>,
     9 +}
     10 + 
    5 11  pub fn delete_files(files: Vec<File>) -> Result<()> {
    6 12   files.into_iter().for_each(|file| {
    7 13   match std::fs::remove_file(file.path.clone()) {
    8 14   Ok(_) => println!("{}: {}", "DELETED".green(), file.path),
    9  - Err(e) => println!("{}: {}", "FAILED".red(), file.path)
     15 + Err(_) => println!("{}: {}", "FAILED".red(), file.path)
    10 16   }
    11 17   });
    12 18   
    skipped 3 lines
  • ■ ■ ■ ■ ■
    src/main.rs
    1  -#![allow(unused)] // TODO: remove this once TUI is implemented
    2 1  mod app;
    3  -mod database;
     2 +mod file_manager;
    4 3  mod output;
    5 4  mod params;
    6 5  mod scanner;
    skipped 10 lines
  • ■ ■ ■ ■ ■ ■
    src/output.rs
    1  -use std::{collections::HashMap, fs, io};
    2  -use std::io::Write;
    3  - 
     1 +use crate::file_manager::{self, File};
     2 +use crate::params::Params;
    4 3  use anyhow::Result;
    5 4  use chrono::offset::Utc;
    6 5  use chrono::DateTime;
    7 6  use colored::Colorize;
     7 +use dashmap::DashMap;
    8 8  use humansize::{format_size, DECIMAL};
    9 9  use itertools::Itertools;
    10  - 
    11  -use crate::app::file_manager;
    12  -use crate::database::File;
    13  -use crate::params::Params;
    14  -use prettytable::{format, row, Cell, Row, Table};
     10 +use prettytable::{format, row, Table};
     11 +use std::io::Write;
     12 +use std::{fs, io};
    15 13  use unicode_segmentation::UnicodeSegmentation;
    16 14   
    17 15  fn format_path(path: &str, opts: &Params) -> Result<String> {
    18 16   let display_path = path.replace(&opts.get_directory()?, "");
    19  - 
    20 17   let display_range = if display_path.chars().count() > 32 {
    21 18   display_path
    22 19   .graphemes(true)
    skipped 23 lines
    46 43   Ok(modified_time.format("%Y-%m-%d %H:%M:%S").to_string())
    47 44  }
    48 45   
    49  -fn group_duplicates(duplicates: Vec<File>) -> HashMap<String, Vec<File>> {
    50  - let mut duplicate_mapper: HashMap<String, Vec<File>> = HashMap::new();
    51  - duplicates.into_iter().for_each(|file| {
    52  - duplicate_mapper
    53  - .entry(file.hash.clone())
    54  - .and_modify(|value| value.push(file.clone()))
    55  - .or_insert_with(|| vec![file]);
    56  - });
    57  - 
    58  - duplicate_mapper
    59  -}
    60  - 
    61  -fn print_meta_info(duplicates: &Vec<File>, opts: &Params) {
     46 +fn print_meta_info() {
    62 47   println!("Deduplicator v{}", std::env!("CARGO_PKG_VERSION"));
    63 48  }
    64 49   
    skipped 10 lines
    75 60  }
    76 61   
    77 62  fn scan_group_confirmation() -> Result<bool> {
    78  - print!("\nconfirm? [Y/n]: ");
     63 + print!("\nconfirm? [y/N]: ");
    79 64   std::io::stdout().flush()?;
    80 65   let mut user_input = String::new();
    81 66   io::stdin().read_line(&mut user_input)?;
    82 67   
    83 68   match user_input.trim() {
    84 69   "Y" | "y" => Ok(true),
    85  - _ => Ok(false)
     70 + _ => Ok(false),
    86 71   }
    87 72  }
    88 73   
    skipped 19 lines
    108 93   
    109 94   print!("{esc}[2J{esc}[1;1H", esc = 27 as char);
    110 95   
    111  - if parsed_file_indices.is_empty() { return }
     96 + if parsed_file_indices.is_empty() {
     97 + return;
     98 + }
    112 99   
    113 100   let files_to_delete = parsed_file_indices
    114 101   .into_iter()
    115 102   .map(|index| duplicates[index].clone());
    116 103   
    117 104   println!("\n{}", "The following files will be deleted:".red());
    118  - files_to_delete.clone().enumerate().for_each(|(index, file)| {
    119  - println!("{}: {}", index.to_string().blue(), file.path);
    120  - });
     105 + files_to_delete
     106 + .clone()
     107 + .enumerate()
     108 + .for_each(|(index, file)| {
     109 + println!("{}: {}", index.to_string().blue(), file.path);
     110 + });
    121 111   
    122 112   match scan_group_confirmation().unwrap() {
    123  - true => { file_manager::delete_files(files_to_delete.collect_vec()); },
    124  - false => println!("{}", "\nCancelled Delete Operation.".red())
     113 + true => {
     114 + file_manager::delete_files(files_to_delete.collect_vec()).ok();
     115 + }
     116 + false => println!("{}", "\nCancelled Delete Operation.".red()),
    125 117   }
    126 118  }
    127 119   
    128  -pub fn interactive(duplicates: Vec<File>, opts: &Params) {
    129  - print_meta_info(&duplicates, opts);
    130  - let grouped_duplicates = group_duplicates(duplicates);
     120 +pub fn interactive(duplicates: DashMap<String, Vec<File>>, opts: &Params) {
     121 + print_meta_info();
     122 + duplicates
     123 + .clone()
     124 + .into_iter()
     125 + .enumerate()
     126 + .for_each(|(gindex, (_, group))| {
     127 + let mut itable = Table::new();
     128 + itable.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
     129 + itable.set_titles(row!["index", "filename", "size", "updated_at"]);
     130 + group.iter().enumerate().for_each(|(index, file)| {
     131 + itable.add_row(row![
     132 + index,
     133 + format_path(&file.path, opts).unwrap_or_default().blue(),
     134 + file_size(&file.path).unwrap_or_default().red(),
     135 + modified_time(&file.path).unwrap_or_default().yellow()
     136 + ]);
     137 + });
    131 138   
    132  - grouped_duplicates.iter().enumerate().for_each(|(gindex, (hash, group))| {
    133  - let mut itable = Table::new();
    134  - itable.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
    135  - itable.set_titles(row!["index", "filename", "size", "updated_at"]);
    136  - group.iter().enumerate().for_each(|(index, file)| {
    137  - itable.add_row(row![
    138  - index,
    139  - format_path(&file.path, opts).unwrap_or_default().blue(),
    140  - file_size(&file.path).unwrap_or_default().red(),
    141  - modified_time(&file.path).unwrap_or_default().yellow()
    142  - ]);
     139 + process_group_action(&group, gindex, duplicates.len(), itable);
    143 140   });
    144  - 
    145  - process_group_action(group, gindex, grouped_duplicates.len(), itable);
    146  - });
    147 141  }
    148 142   
    149  -pub fn print(duplicates: Vec<File>, opts: &Params) {
    150  - print_meta_info(&duplicates, opts);
     143 +pub fn print(duplicates: DashMap<String, Vec<File>>, opts: &Params) {
     144 + print_meta_info();
    151 145   
    152 146   let mut output_table = Table::new();
    153  - let grouped_duplicates: HashMap<String, Vec<File>> = group_duplicates(duplicates);
    154  - 
    155 147   output_table.set_titles(row!["hash", "duplicates"]);
    156  - grouped_duplicates.iter().for_each(|(hash, group)| {
     148 + duplicates.into_iter().for_each(|(hash, group)| {
    157 149   let mut inner_table = Table::new();
    158 150   inner_table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
    159 151   group.iter().for_each(|file| {
    skipped 12 lines
  • ■ ■ ■ ■ ■ ■
    src/params.rs
    1  -use std::{fs, path::PathBuf};
    2  - 
    3 1  use anyhow::{anyhow, Result};
    4 2  use clap::Parser;
     3 +use std::{fs, path::PathBuf};
    5 4   
    6 5  #[derive(Parser, Debug)]
    7 6  #[command(author, version, about, long_about = None)]
    skipped 4 lines
    12 11   /// Run Deduplicator on dir different from pwd
    13 12   #[arg(long)]
    14 13   pub dir: Option<PathBuf>,
    15  - /// Don't use cache for indexing files (default = false)
    16  - #[arg(long, short)]
    17  - pub nocache: bool,
    18 14   /// Delete files interactively
    19 15   #[arg(long, short)]
    20  - pub interactive: bool
     16 + pub interactive: bool,
    21 17  }
    22 18   
    23 19  impl Params {
    skipped 12 lines
    36 32   .to_string();
    37 33   
    38 34   Ok(dir)
     35 + }
     36 + 
     37 + pub fn get_glob_patterns(&self) -> Vec<PathBuf> {
     38 + self.types
     39 + .clone()
     40 + .unwrap_or_else(|| String::from("*"))
     41 + .split(',')
     42 + .map(|filetype| format!("*.{}", filetype))
     43 + .map(|filetype| {
     44 + vec![self.get_directory().unwrap(), String::from("**"), filetype]
     45 + .iter()
     46 + .collect()
     47 + })
     48 + .collect()
    39 49   }
    40 50  }
    41 51   
  • ■ ■ ■ ■ ■ ■
    src/scanner.rs
    1  -use std::{fs, path::PathBuf};
    2  -use indicatif::{HumanDuration, MultiProgress, ProgressBar, ProgressStyle, ParallelProgressIterator};
    3 1  use anyhow::Result;
    4  -use fxhash::hash32 as hasher;
     2 +use dashmap::DashMap;
     3 +use fxhash::hash64 as hasher;
    5 4  use glob::glob;
    6  -use itertools::Itertools;
     5 +use indicatif::{ParallelProgressIterator, ProgressStyle};
     6 +use memmap2::Mmap;
    7 7  use rayon::prelude::*;
     8 +use std::hash::Hasher;
     9 +use std::{fs, path::PathBuf};
    8 10   
    9  -use crate::{
    10  - database::{self, File},
    11  - params::Params,
    12  -};
     11 +use crate::{file_manager::File, params::Params};
    13 12   
    14  -pub fn duplicates(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<File>> {
    15  - let scan_results = scan(app_opts, connection)?;
    16  - let base_path = app_opts.get_directory()?;
    17  - 
    18  - index_files(scan_results, connection)?;
    19  - database::duplicate_hashes(connection, &base_path)
     13 +#[derive(Clone, Copy)]
     14 +enum IndexCritera {
     15 + Size,
     16 + Hash,
    20 17  }
    21 18   
    22  -fn get_glob_patterns(opts: &Params, directory: &str) -> Vec<PathBuf> {
    23  - opts.types
    24  - .clone()
    25  - .unwrap_or_else(|| String::from("*"))
    26  - .split(',')
    27  - .map(|filetype| format!("*.{}", filetype))
    28  - .map(|filetype| {
    29  - vec![directory.to_owned(), String::from("**"), filetype]
    30  - .iter()
    31  - .collect()
    32  - })
    33  - .collect()
    34  -}
     19 +pub fn duplicates(app_opts: &Params) -> Result<DashMap<String, Vec<File>>> {
     20 + let scan_results = scan(app_opts)?;
     21 + let size_index_store = index_files(scan_results, IndexCritera::Size)?;
    35 22   
    36  -fn is_indexed_file(path: impl Into<String>, indexed: &[File]) -> bool {
    37  - indexed
    38  - .iter()
    39  - .map(|file| file.path.clone())
    40  - .contains(&path.into())
     23 + let sizewize_duplicate_files = size_index_store
     24 + .into_par_iter()
     25 + .filter(|(_, files)| files.len() > 1)
     26 + .map(|(_, files)| files)
     27 + .flatten()
     28 + .collect::<Vec<File>>();
     29 + 
     30 + if sizewize_duplicate_files.len() > 1 {
     31 + let size_wise_duplicate_paths = sizewize_duplicate_files
     32 + .into_par_iter()
     33 + .map(|file| file.path)
     34 + .collect::<Vec<String>>();
     35 + 
     36 + let hash_index_store = index_files(size_wise_duplicate_paths, IndexCritera::Hash)?;
     37 + let duplicate_files = hash_index_store
     38 + .into_par_iter()
     39 + .filter(|(_, files)| files.len() > 1)
     40 + .collect();
     41 + 
     42 + Ok(duplicate_files)
     43 + } else {
     44 + Ok(DashMap::new())
     45 + }
    41 46  }
    42 47   
    43  -fn scan(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<String>> {
    44  - let directory = app_opts.get_directory()?;
    45  - let glob_patterns: Vec<PathBuf> = get_glob_patterns(app_opts, &directory);
    46  - let indexed_paths = database::indexed_paths(connection)?;
     48 +fn scan(app_opts: &Params) -> Result<Vec<String>> {
     49 + let glob_patterns: Vec<PathBuf> = app_opts.get_glob_patterns();
    47 50   let files: Vec<String> = glob_patterns
    48 51   .par_iter()
    49  - .progress_with_style(ProgressStyle::with_template("{spinner:.green} [scanning files] [{wide_bar:.cyan/blue}] {pos}/{len} files").unwrap())
     52 + .progress_with_style(ProgressStyle::with_template(
     53 + "{spinner:.green} [scanning files] [{wide_bar:.cyan/blue}] {pos}/{len} files",
     54 + )?)
    50 55   .filter_map(|glob_pattern| glob(glob_pattern.as_os_str().to_str()?).ok())
    51 56   .flat_map(|file_vec| {
    52 57   file_vec
    53 58   .filter_map(|x| Some(x.ok()?.as_os_str().to_str()?.to_string()))
    54  - .filter(|fpath| !is_indexed_file(fpath, &indexed_paths))
    55 59   .filter(|glob_result| {
    56 60   fs::metadata(glob_result)
    57 61   .map(|f| f.is_file())
    skipped 6 lines
    64 68   Ok(files)
    65 69  }
    66 70   
    67  -fn index_files(files: Vec<String>, connection: &sqlite::Connection) -> Result<()> {
    68  - let hashed: Vec<File> = files
     71 +fn process_file_size_index(fpath: String) -> Result<File> {
     72 + Ok(File {
     73 + path: fpath.clone(),
     74 + size: Some(fs::metadata(fpath)?.len()),
     75 + hash: None,
     76 + })
     77 +}
     78 + 
     79 +fn process_file_hash_index(fpath: String) -> Result<File> {
     80 + Ok(File {
     81 + path: fpath.clone(),
     82 + size: None,
     83 + hash: Some(hash_file(&fpath).unwrap_or_default()),
     84 + })
     85 +}
     86 + 
     87 +fn process_file_index(
     88 + fpath: String,
     89 + store: &DashMap<String, Vec<File>>,
     90 + index_criteria: IndexCritera,
     91 +) {
     92 + match index_criteria {
     93 + IndexCritera::Size => {
     94 + let processed_file = process_file_size_index(fpath).unwrap();
     95 + store
     96 + .entry(processed_file.size.unwrap_or_default().to_string())
     97 + .and_modify(|fileset| fileset.push(processed_file.clone()))
     98 + .or_insert_with(|| vec![processed_file]);
     99 + }
     100 + IndexCritera::Hash => {
     101 + let processed_file = process_file_hash_index(fpath).unwrap();
     102 + let indexhash = processed_file.clone().hash.unwrap_or_default();
     103 + 
     104 + store
     105 + .entry(indexhash)
     106 + .and_modify(|fileset| fileset.push(processed_file.clone()))
     107 + .or_insert_with(|| vec![processed_file]);
     108 + }
     109 + }
     110 +}
     111 + 
     112 +fn index_files(
     113 + files: Vec<String>,
     114 + index_criteria: IndexCritera,
     115 +) -> Result<DashMap<String, Vec<File>>> {
     116 + let store: DashMap<String, Vec<File>> = DashMap::new();
     117 + files
    69 118   .into_par_iter()
    70  - .progress_with_style(ProgressStyle::with_template("{spinner:.green} [indexing files] [{wide_bar:.cyan/blue}] {pos}/{len} files").unwrap())
    71  - .filter_map(|file| {
    72  - let hash = hash_file(&file).ok()?;
    73  - Some(database::File { path: file, hash })
    74  - })
    75  - .collect();
     119 + .progress_with_style(ProgressStyle::with_template(
     120 + "{spinner:.green} [indexing files] [{wide_bar:.cyan/blue}] {pos}/{len} files",
     121 + )?)
     122 + .for_each(|file| process_file_index(file, &store, index_criteria));
    76 123   
    77  - hashed
    78  - .iter()
    79  - .try_for_each(|file| database::put(file, connection))
     124 + Ok(store)
    80 125  }
    81 126   
    82  -pub fn hash_file(filepath: &str) -> Result<String> {
     127 +pub fn incremental_hashing(filepath: &str) -> Result<String> {
     128 + let file = fs::File::open(filepath)?;
     129 + let fmap = unsafe { Mmap::map(&file)? };
     130 + let mut inchasher = fxhash::FxHasher::default();
     131 + 
     132 + fmap.chunks(1_000_000)
     133 + .for_each(|mega| inchasher.write(mega));
     134 + 
     135 + Ok(format!("{}", inchasher.finish()))
     136 +}
     137 + 
     138 +pub fn standard_hashing(filepath: &str) -> Result<String> {
    83 139   let file = fs::read(filepath)?;
    84  - let hash = hasher(&*file).to_string();
     140 + Ok(hasher(&*file).to_string())
     141 +}
     142 + 
     143 +pub fn hash_file(filepath: &str) -> Result<String> {
     144 + let filemeta = fs::metadata(filepath)?;
    85 145   
    86  - Ok(hash)
     146 + // NOTE: USE INCREMENTAL HASHING ONLY FOR FILES > 100MB
     147 + match filemeta.len() < 100_000_000 {
     148 + true => standard_hashing(filepath),
     149 + false => incremental_hashing(filepath),
     150 + }
    87 151  }
    88 152   
Please wait...
Page is in error, reload to recover