STRLCPY/deduplicator

■ ■ ■ ■ ■ ■

Cargo.lock

		skipped 70 lines
71	71		checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c"
72	72
73	73		[[package]]
74		-	name = "cassowary"
75		-	version = "0.3.0"
76		-	source = "registry+https://github.com/rust-lang/crates.io-index"
77		-	checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
78		-
79		-	[[package]]
80	74		name = "cc"
81	75		version = "1.0.78"
82	76		source = "registry+https://github.com/rust-lang/crates.io-index"
		skipped 141 lines
224	218		]
225	219
226	220		[[package]]
227		-	name = "crossterm"
228		-	version = "0.25.0"
229		-	source = "registry+https://github.com/rust-lang/crates.io-index"
230		-	checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
231		-	dependencies = [
232		-	"bitflags",
233		-	"crossterm_winapi",
234		-	"libc",
235		-	"mio",
236		-	"parking_lot",
237		-	"signal-hook",
238		-	"signal-hook-mio",
239		-	"winapi",
240		-	]
241		-
242		-	[[package]]
243		-	name = "crossterm_winapi"
244		-	version = "0.9.0"
245		-	source = "registry+https://github.com/rust-lang/crates.io-index"
246		-	checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
247		-	dependencies = [
248		-	"winapi",
249		-	]
250		-
251		-	[[package]]
252	221		name = "csv"
253	222		version = "1.1.6"
254	223		source = "registry+https://github.com/rust-lang/crates.io-index"
		skipped 60 lines
315	284		]
316	285
317	286		[[package]]
	287	+	name = "dashmap"
	288	+	version = "5.4.0"
	289	+	source = "registry+https://github.com/rust-lang/crates.io-index"
	290	+	checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
	291	+	dependencies = [
	292	+	"cfg-if",
	293	+	"hashbrown",
	294	+	"lock_api",
	295	+	"once_cell",
	296	+	"parking_lot_core",
	297	+	"rayon",
	298	+	]
	299	+
	300	+	[[package]]
318	301		name = "deduplicator"
319		-	version = "0.0.9"
	302	+	version = "0.1.1"
320	303		dependencies = [
321	304		"anyhow",
322	305		"chrono",
323	306		"clap",
324	307		"colored",
325		-	"crossterm",
	308	+	"dashmap",
326	309		"fxhash",
327	310		"glob",
328	311		"humansize",
329	312		"indicatif",
330	313		"itertools",
	314	+	"memmap2",
331	315		"prettytable-rs",
332	316		"rayon",
333		-	"sqlite",
334	317		"thiserror",
335	318		"tokio",
336		-	"tui",
337	319		"unicode-segmentation",
338	320		]
339	321
		skipped 84 lines
424	406		checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
425	407
426	408		[[package]]
	409	+	name = "hashbrown"
	410	+	version = "0.12.3"
	411	+	source = "registry+https://github.com/rust-lang/crates.io-index"
	412	+	checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
	413	+
	414	+	[[package]]
427	415		name = "heck"
428	416		version = "0.4.0"
429	417		source = "registry+https://github.com/rust-lang/crates.io-index"
		skipped 169 lines
599	587		checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
600	588
601	589		[[package]]
	590	+	name = "memmap2"
	591	+	version = "0.5.8"
	592	+	source = "registry+https://github.com/rust-lang/crates.io-index"
	593	+	checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc"
	594	+	dependencies = [
	595	+	"libc",
	596	+	]
	597	+
	598	+	[[package]]
602	599		name = "memoffset"
603	600		version = "0.7.1"
604	601		source = "registry+https://github.com/rust-lang/crates.io-index"
		skipped 91 lines
696	693		checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
697	694
698	695		[[package]]
699		-	name = "pkg-config"
700		-	version = "0.3.26"
701		-	source = "registry+https://github.com/rust-lang/crates.io-index"
702		-	checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
703		-
704		-	[[package]]
705	696		name = "portable-atomic"
706	697		version = "0.3.19"
707	698		source = "registry+https://github.com/rust-lang/crates.io-index"
		skipped 148 lines
856	847		checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
857	848
858	849		[[package]]
859		-	name = "signal-hook"
860		-	version = "0.3.14"
861		-	source = "registry+https://github.com/rust-lang/crates.io-index"
862		-	checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d"
863		-	dependencies = [
864		-	"libc",
865		-	"signal-hook-registry",
866		-	]
867		-
868		-	[[package]]
869		-	name = "signal-hook-mio"
870		-	version = "0.2.3"
871		-	source = "registry+https://github.com/rust-lang/crates.io-index"
872		-	checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
873		-	dependencies = [
874		-	"libc",
875		-	"mio",
876		-	"signal-hook",
877		-	]
878		-
879		-	[[package]]
880	850		name = "signal-hook-registry"
881	851		version = "1.4.0"
882	852		source = "registry+https://github.com/rust-lang/crates.io-index"
		skipped 19 lines
902	872		]
903	873
904	874		[[package]]
905		-	name = "sqlite"
906		-	version = "0.30.3"
907		-	source = "registry+https://github.com/rust-lang/crates.io-index"
908		-	checksum = "12e072cb5fb89b3fe5e9c9584676348feb503f9fb3ae829d9868171bc5372d48"
909		-	dependencies = [
910		-	"libc",
911		-	"sqlite3-sys",
912		-	]
913		-
914		-	[[package]]
915		-	name = "sqlite3-src"
916		-	version = "0.4.0"
917		-	source = "registry+https://github.com/rust-lang/crates.io-index"
918		-	checksum = "d1815a7a02c996eb8e5c64f61fcb6fd9b12e593ce265c512c5853b2513635691"
919		-	dependencies = [
920		-	"cc",
921		-	"pkg-config",
922		-	]
923		-
924		-	[[package]]
925		-	name = "sqlite3-sys"
926		-	version = "0.14.0"
927		-	source = "registry+https://github.com/rust-lang/crates.io-index"
928		-	checksum = "d47c99824fc55360ba00caf28de0b8a0458369b832e016a64c13af0ad9fbb9ee"
929		-	dependencies = [
930		-	"libc",
931		-	"sqlite3-src",
932		-	]
933		-
934		-	[[package]]
935	875		name = "strsim"
936	876		version = "0.10.0"
937	877		source = "registry+https://github.com/rust-lang/crates.io-index"
		skipped 90 lines
1028	968		"proc-macro2",
1029	969		"quote",
1030	970		"syn",
1031		-	]
1032		-
1033		-	[[package]]
1034		-	name = "tui"
1035		-	version = "0.19.0"
1036		-	source = "registry+https://github.com/rust-lang/crates.io-index"
1037		-	checksum = "ccdd26cbd674007e649a272da4475fb666d3aa0ad0531da7136db6fab0e5bad1"
1038		-	dependencies = [
1039		-	"bitflags",
1040		-	"cassowary",
1041		-	"crossterm",
1042		-	"unicode-segmentation",
1043		-	"unicode-width",
1044	971		]
1045	972
1046	973		[[package]]
		skipped 177 lines

■ ■ ■ ■ ■ ■

Cargo.toml

1	1		[package]
2	2		name = "deduplicator"
3		-	version = "0.0.9"
	3	+	version = "0.1.1"
4	4		edition = "2021"
5	5		description = "find,filter,delete Duplicates"
6	6		license = "MIT"
		skipped 6 lines
13	13		chrono = "0.4.23"
14	14		clap = { version = "4.0.32", features = ["derive"] }
15	15		colored = "2.0.0"
16		-	crossterm = "0.25.0"
	16	+	dashmap = { version = "5.4.0", features = ["rayon"] }
17	17		fxhash = "0.2.1"
18	18		glob = "0.3.0"
19	19		humansize = "2.1.2"
20	20		indicatif = { version = "0.17.2", features = ["rayon", "tokio"] }
21	21		itertools = "0.10.5"
	22	+	memmap2 = "0.5.8"
22	23		prettytable-rs = "0.10.0"
23	24		rayon = "1.6.1"
24		-	sqlite = "0.30.3"
25	25		thiserror = "1.0.38"
26	26		tokio = { version = "1.23.0", features = ["full"] }
27		-	tui = "0.19.0"
28	27		unicode-segmentation = "1.10.0"
29	28

■ ■ ■ ■ ■ ■

README.md

		skipped 15 lines
16	16		Options:
17	17		-t, --types <TYPES> Filetypes to deduplicate (default = all)
18	18		--dir <DIR> Run Deduplicator on dir different from pwd
19		-	-n, --nocache Don't use cache for indexing files (default = false)
20	19		-i, --interactive Delete files interactively
21	20		-h, --help Print help information
22	21		-V, --version Print version information
		skipped 29 lines

■ ■ ■ ■ ■ ■

src/app/event_handler.rs

1	-	use std::time::Duration;
2	-
3	-	use anyhow::Result;
4	-	use crossterm::event::{self, KeyCode, KeyEvent};
5	-
6	-	use super::events;
7	-
8	-	pub struct EventHandler;
9	-
10	-	impl EventHandler {
11	-	pub fn init() -> Result<events::Event> {
12	-	if crossterm::event::poll(Duration::from_millis(10))? {
13	-	match event::read()? {
14	-	event::Event::Key(keycode) => Self::handle_keypress(keycode),
15	-	_ => Ok(events::Event::Noop),
16	-	}
17	-	} else {
18	-	Ok(events::Event::Noop)
19	-	}
20	-	}
21	-
22	-	fn handle_keypress(keyevent: KeyEvent) -> Result<events::Event> {
23	-	match keyevent.code {
24	-	KeyCode::Char('q') => Ok(events::Event::Exit),
25	-	_ => Ok(events::Event::Noop),
26	-	}
27	-	}
28	-	}
29	-

■ ■ ■ ■ ■ ■

src/app/events.rs

1	-	pub enum Event {
2	-	Exit,
3	-	Noop,
4	-	}
5	-

■ ■ ■ ■ ■ ■

src/app/formatter.rs

1

-

■ ■ ■ ■ ■ ■

src/app/mod.rs

1	-	#![allow(unused)]
2	-
3	-	mod event_handler;
4	-	mod events;
5	-	mod formatter;
6	-	mod ui;
7	-	pub mod file_manager;
8	-
9	-	use std::{io, thread, time::Duration};
10	-
11	-	use anyhow::{anyhow, Result};
12	-	use crossterm::{event, execute, terminal};
13	-	use event_handler::EventHandler;
14	-	use tui::{
15	-	backend::CrosstermBackend,
16	-	widgets::{Block, Borders, Widget},
17	-	Terminal,
18	-	};
19	-	use ui::Ui;
20	-
21	-	use crate::database;
22	-	use crate::output;
23	-	use crate::params::Params;
24	-	use crate::scanner;
25	-
26	-	pub struct App;
27	-
28	-	impl App {
29	-	pub fn init(app_args: &Params) -> Result<()> {
30	-	// let mut term = Self::init_terminal()?;
31	-
32	-	let connection = database::get_connection(app_args)?;
33	-	let duplicates = scanner::duplicates(app_args, &connection)?;
34	-
35	-	// Self::init_render_loop(&mut term)?;
36	-	// Self::cleanup(&mut term)?;
37	-
38	-	match app_args.interactive {
39	-	true => output::interactive(duplicates, app_args),
40	-	false => output::print(duplicates, app_args) /* TODO: APP TUI INIT FUNCTION */
41	-	}
42	-
43	-	Ok(())
44	-	}
45	-
46	-	fn cleanup(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
47	-	terminal::disable_raw_mode()?;
48	-	execute!(
49	-	term.backend_mut(),
50	-	terminal::LeaveAlternateScreen,
51	-	event::DisableMouseCapture
52	-	)?;
53	-
54	-	term.show_cursor()?;
55	-	Ok(())
56	-	}
57	-
58	-	fn render_cycle(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
59	-	match EventHandler::init()? {
60	-	events::Event::Noop => Ui::render_frame(term),
61	-	events::Event::Exit => Err(anyhow!("Exit")),
62	-	}
63	-	}
64	-
65	-	fn init_render_loop(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
66	-	// this could be simplified with a `while Self::render_cycle(term).is_ok() {}` in the current state, but maybe
67	-	// it's good to keep it to handle errors in the future
68	-	loop {
69	-	match Self::render_cycle(term) {
70	-	Ok(_) => continue,
71	-	Err(_) => break,
72	-	}
73	-	}
74	-
75	-	Ok(())
76	-	}
77	-
78	-	fn init_terminal() -> Result<Terminal<CrosstermBackend<io::Stdout>>> {
79	-	terminal::enable_raw_mode()?;
80	-	let mut stdout = io::stdout();
81	-	execute!(
82	-	stdout,
83	-	terminal::EnterAlternateScreen,
84	-	event::EnableMouseCapture
85	-	)?;
86	-	let backend = CrosstermBackend::new(stdout);
87	-	Ok(Terminal::new(backend)?)
88	-	}
89	-	}
90	-

■ ■ ■ ■ ■ ■

src/app/ui.rs

1	-	use std::io;
2	-
3	-	use anyhow::Result;
4	-	use tui::{
5	-	backend::{Backend, CrosstermBackend},
6	-	layout::{Constraint, Direction, Layout, Rect},
7	-	style::{Modifier, Style},
8	-	text::{Span, Spans},
9	-	widgets::{Block, Borders, List, ListItem, Widget},
10	-	Frame, Terminal,
11	-	};
12	-
13	-	pub struct Ui;
14	-
15	-	impl Ui {
16	-	fn generate_file_list() -> impl Widget {
17	-	let tasks: Vec<ListItem> = vec!["Sreedev"; 100]
18	-	.into_iter()
19	-	.map(\|item\| ListItem::new(vec![Spans::from(Span::raw(item))]))
20	-	.collect();
21	-
22	-	List::new(tasks)
23	-	.block(Block::default().borders(Borders::ALL).title("List"))
24	-	.highlight_style(Style::default().add_modifier(Modifier::BOLD))
25	-	.highlight_symbol("> ")
26	-	}
27	-
28	-	fn generate_info_bar() -> impl Widget {
29	-	Block::default().title("Description").borders(Borders::ALL)
30	-	}
31	-
32	-	fn generate_file_desc() -> impl Widget {
33	-	Block::default().title("Description").borders(Borders::ALL)
34	-	}
35	-
36	-	pub fn render_frame(term: &mut Terminal<CrosstermBackend<io::Stdout>>) -> Result<()> {
37	-	term.draw(\|f\| {
38	-	let windows = Layout::default()
39	-	.direction(Direction::Vertical)
40	-	.constraints([Constraint::Ratio(2, 16), Constraint::Ratio(14, 16)].as_ref())
41	-	.split(f.size());
42	-
43	-	let subwindows = Layout::default()
44	-	.direction(Direction::Horizontal)
45	-	.constraints([Constraint::Ratio(1, 4), Constraint::Ratio(3, 4)].as_ref())
46	-	.split(windows[1]);
47	-
48	-	f.render_widget(Self::generate_info_bar(), windows[0]);
49	-	f.render_widget(Self::generate_file_list(), subwindows[0]);
50	-	f.render_widget(Self::generate_file_desc(), subwindows[1]);
51	-	})?;
52	-	Ok(())
53	-	}
54	-	}
55	-

■ ■ ■ ■ ■ ■

src/app.rs

1	+	use crate::output;
2	+	use crate::params::Params;
3	+	use crate::scanner;
4	+	use anyhow::Result;
5	+
6	+	pub struct App;
7	+
8	+	impl App {
9	+	pub fn init(app_args: &Params) -> Result<()> {
10	+	let duplicates = scanner::duplicates(app_args)?;
11	+	match app_args.interactive {
12	+	true => output::interactive(duplicates, app_args),
13	+	false => output::print(duplicates, app_args),
14	+	}
15	+
16	+	Ok(())
17	+	}
18	+	}
19	+

■ ■ ■ ■ ■ ■

src/database.rs

1	-	use std::env::temp_dir;
2	-
3	-	use anyhow::Result;
4	-
5	-	use crate::params::Params;
6	-
7	-	#[derive(Debug, Clone)]
8	-	pub struct File {
9	-	pub path: String,
10	-	pub hash: String,
11	-	}
12	-
13	-	fn db_connection_url(args: &Params) -> String {
14	-	match args.nocache {
15	-	true => String::from(":memory:"),
16	-	false => {
17	-	let temp_dir_path = temp_dir();
18	-	format!("{}/deduplicator.db", temp_dir_path.display())
19	-	}
20	-	}
21	-	}
22	-
23	-	pub fn get_connection(args: &Params) -> Result<sqlite::Connection, sqlite::Error> {
24	-	sqlite::open(db_connection_url(args)).and_then(\|conn\| {
25	-	setup(&conn).ok();
26	-	Ok(conn)
27	-	})
28	-	}
29	-
30	-	pub fn setup(connection: &sqlite::Connection) -> Result<()> {
31	-	let query = "CREATE TABLE files (file_identifier STRING, hash STRING)";
32	-	connection.execute(query).ok();
33	-	Ok(())
34	-	}
35	-
36	-	pub fn put(file: &File, connection: &sqlite::Connection) -> Result<()> {
37	-	let query = format!(
38	-	"INSERT INTO files (file_identifier, hash) VALUES (\"{}\", \"{}\")",
39	-	file.path, file.hash
40	-	);
41	-	connection.execute(query)?;
42	-	Ok(())
43	-	}
44	-
45	-	pub fn indexed_paths(connection: &sqlite::Connection) -> Result<Vec<File>> {
46	-	let query = "SELECT * FROM files";
47	-
48	-	let result: Vec<File> = connection
49	-	.prepare(query)?
50	-	.into_iter()
51	-	.filter_map(\|row_result\| row_result.ok())
52	-	.map(\|row\| {
53	-	let path = row.read::<&str, _>("file_identifier").to_string();
54	-	let hash = row.read::<i64, _>("hash").to_string();
55	-	File { path, hash }
56	-	})
57	-	.collect();
58	-
59	-	Ok(result)
60	-	}
61	-
62	-	pub fn duplicate_hashes(connection: &sqlite::Connection, path: &str) -> Result<Vec<File>> {
63	-	let query = format!(
64	-	"
65	-	SELECT a.* FROM files a
66	-	JOIN (SELECT file_identifier, hash, COUNT(*)
67	-	FROM files
68	-	GROUP BY hash
69	-	HAVING count(*) > 1 ) b
70	-	ON a.hash = b.hash
71	-	WHERE a.file_identifier LIKE \"{}%\"
72	-	ORDER BY a.file_identifier
73	-	",
74	-	path
75	-	);
76	-
77	-	let result: Vec<File> = connection
78	-	.prepare(query)?
79	-	.into_iter()
80	-	.filter_map(\|row_result\| row_result.ok())
81	-	.map(\|row\| {
82	-	let path = row.read::<&str, _>("file_identifier").to_string();
83	-	let hash = row.read::<i64, _>("hash").to_string();
84	-	File { path, hash }
85	-	})
86	-	.collect();
87	-
88	-	Ok(result)
89	-	}
90	-

■ ■ ■ ■ ■ ■

src/app/file_manager.rs src/file_manager.rs

1		-	use crate::database::File;
2	1		use anyhow::Result;
3	2		use colored::Colorize;
4	3
	4	+	#[derive(Debug, Clone)]
	5	+	pub struct File {
	6	+	pub path: String,
	7	+	pub size: Option<u64>,
	8	+	pub hash: Option<String>,
	9	+	}
	10	+
5	11		pub fn delete_files(files: Vec<File>) -> Result<()> {
6	12		files.into_iter().for_each(\|file\| {
7	13		match std::fs::remove_file(file.path.clone()) {
8	14		Ok(_) => println!("{}: {}", "DELETED".green(), file.path),
9		-	Err(e) => println!("{}: {}", "FAILED".red(), file.path)
	15	+	Err(_) => println!("{}: {}", "FAILED".red(), file.path)
10	16		}
11	17		});
12	18
		skipped 3 lines

■ ■ ■ ■ ■ ■

src/main.rs

1		-	#![allow(unused)] // TODO: remove this once TUI is implemented
2	1		mod app;
3		-	mod database;
	2	+	mod file_manager;
4	3		mod output;
5	4		mod params;
6	5		mod scanner;
		skipped 10 lines

■ ■ ■ ■ ■ ■

src/output.rs

1		-	use std::{collections::HashMap, fs, io};
2		-	use std::io::Write;
3		-
	1	+	use crate::file_manager::{self, File};
	2	+	use crate::params::Params;
4	3		use anyhow::Result;
5	4		use chrono::offset::Utc;
6	5		use chrono::DateTime;
7	6		use colored::Colorize;
	7	+	use dashmap::DashMap;
8	8		use humansize::{format_size, DECIMAL};
9	9		use itertools::Itertools;
10		-
11		-	use crate::app::file_manager;
12		-	use crate::database::File;
13		-	use crate::params::Params;
14		-	use prettytable::{format, row, Cell, Row, Table};
	10	+	use prettytable::{format, row, Table};
	11	+	use std::io::Write;
	12	+	use std::{fs, io};
15	13		use unicode_segmentation::UnicodeSegmentation;
16	14
17	15		fn format_path(path: &str, opts: &Params) -> Result<String> {
18	16		let display_path = path.replace(&opts.get_directory()?, "");
19		-
20	17		let display_range = if display_path.chars().count() > 32 {
21	18		display_path
22	19		.graphemes(true)
		skipped 23 lines
46	43		Ok(modified_time.format("%Y-%m-%d %H:%M:%S").to_string())
47	44		}
48	45
49		-	fn group_duplicates(duplicates: Vec<File>) -> HashMap<String, Vec<File>> {
50		-	let mut duplicate_mapper: HashMap<String, Vec<File>> = HashMap::new();
51		-	duplicates.into_iter().for_each(\|file\| {
52		-	duplicate_mapper
53		-	.entry(file.hash.clone())
54		-	.and_modify(\|value\| value.push(file.clone()))
55		-	.or_insert_with(\|\| vec![file]);
56		-	});
57		-
58		-	duplicate_mapper
59		-	}
60		-
61		-	fn print_meta_info(duplicates: &Vec<File>, opts: &Params) {
	46	+	fn print_meta_info() {
62	47		println!("Deduplicator v{}", std::env!("CARGO_PKG_VERSION"));
63	48		}
64	49
		skipped 10 lines
75	60		}
76	61
77	62		fn scan_group_confirmation() -> Result<bool> {
78		-	print!("\nconfirm? [Y/n]: ");
	63	+	print!("\nconfirm? [y/N]: ");
79	64		std::io::stdout().flush()?;
80	65		let mut user_input = String::new();
81	66		io::stdin().read_line(&mut user_input)?;
82	67
83	68		match user_input.trim() {
84	69		"Y" \| "y" => Ok(true),
85		-	_ => Ok(false)
	70	+	_ => Ok(false),
86	71		}
87	72		}
88	73
		skipped 19 lines
108	93
109	94		print!("{esc}[2J{esc}[1;1H", esc = 27 as char);
110	95
111		-	if parsed_file_indices.is_empty() { return }
	96	+	if parsed_file_indices.is_empty() {
	97	+	return;
	98	+	}
112	99
113	100		let files_to_delete = parsed_file_indices
114	101		.into_iter()
115	102		.map(\|index\| duplicates[index].clone());
116	103
117	104		println!("\n{}", "The following files will be deleted:".red());
118		-	files_to_delete.clone().enumerate().for_each(\|(index, file)\| {
119		-	println!("{}: {}", index.to_string().blue(), file.path);
120		-	});
	105	+	files_to_delete
	106	+	.clone()
	107	+	.enumerate()
	108	+	.for_each(\|(index, file)\| {
	109	+	println!("{}: {}", index.to_string().blue(), file.path);
	110	+	});
121	111
122	112		match scan_group_confirmation().unwrap() {
123		-	true => { file_manager::delete_files(files_to_delete.collect_vec()); },
124		-	false => println!("{}", "\nCancelled Delete Operation.".red())
	113	+	true => {
	114	+	file_manager::delete_files(files_to_delete.collect_vec()).ok();
	115	+	}
	116	+	false => println!("{}", "\nCancelled Delete Operation.".red()),
125	117		}
126	118		}
127	119
128		-	pub fn interactive(duplicates: Vec<File>, opts: &Params) {
129		-	print_meta_info(&duplicates, opts);
130		-	let grouped_duplicates = group_duplicates(duplicates);
	120	+	pub fn interactive(duplicates: DashMap<String, Vec<File>>, opts: &Params) {
	121	+	print_meta_info();
	122	+	duplicates
	123	+	.clone()
	124	+	.into_iter()
	125	+	.enumerate()
	126	+	.for_each(\|(gindex, (_, group))\| {
	127	+	let mut itable = Table::new();
	128	+	itable.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
	129	+	itable.set_titles(row!["index", "filename", "size", "updated_at"]);
	130	+	group.iter().enumerate().for_each(\|(index, file)\| {
	131	+	itable.add_row(row![
	132	+	index,
	133	+	format_path(&file.path, opts).unwrap_or_default().blue(),
	134	+	file_size(&file.path).unwrap_or_default().red(),
	135	+	modified_time(&file.path).unwrap_or_default().yellow()
	136	+	]);
	137	+	});
131	138
132		-	grouped_duplicates.iter().enumerate().for_each(\|(gindex, (hash, group))\| {
133		-	let mut itable = Table::new();
134		-	itable.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
135		-	itable.set_titles(row!["index", "filename", "size", "updated_at"]);
136		-	group.iter().enumerate().for_each(\|(index, file)\| {
137		-	itable.add_row(row![
138		-	index,
139		-	format_path(&file.path, opts).unwrap_or_default().blue(),
140		-	file_size(&file.path).unwrap_or_default().red(),
141		-	modified_time(&file.path).unwrap_or_default().yellow()
142		-	]);
	139	+	process_group_action(&group, gindex, duplicates.len(), itable);
143	140		});
144		-
145		-	process_group_action(group, gindex, grouped_duplicates.len(), itable);
146		-	});
147	141		}
148	142
149		-	pub fn print(duplicates: Vec<File>, opts: &Params) {
150		-	print_meta_info(&duplicates, opts);
	143	+	pub fn print(duplicates: DashMap<String, Vec<File>>, opts: &Params) {
	144	+	print_meta_info();
151	145
152	146		let mut output_table = Table::new();
153		-	let grouped_duplicates: HashMap<String, Vec<File>> = group_duplicates(duplicates);
154		-
155	147		output_table.set_titles(row!["hash", "duplicates"]);
156		-	grouped_duplicates.iter().for_each(\|(hash, group)\| {
	148	+	duplicates.into_iter().for_each(\|(hash, group)\| {
157	149		let mut inner_table = Table::new();
158	150		inner_table.set_format(*format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR);
159	151		group.iter().for_each(\|file\| {
		skipped 12 lines

■ ■ ■ ■ ■ ■

src/params.rs

1		-	use std::{fs, path::PathBuf};
2		-
3	1		use anyhow::{anyhow, Result};
4	2		use clap::Parser;
	3	+	use std::{fs, path::PathBuf};
5	4
6	5		#[derive(Parser, Debug)]
7	6		#[command(author, version, about, long_about = None)]
		skipped 4 lines
12	11		/// Run Deduplicator on dir different from pwd
13	12		#[arg(long)]
14	13		pub dir: Option<PathBuf>,
15		-	/// Don't use cache for indexing files (default = false)
16		-	#[arg(long, short)]
17		-	pub nocache: bool,
18	14		/// Delete files interactively
19	15		#[arg(long, short)]
20		-	pub interactive: bool
	16	+	pub interactive: bool,
21	17		}
22	18
23	19		impl Params {
		skipped 12 lines
36	32		.to_string();
37	33
38	34		Ok(dir)
	35	+	}
	36	+
	37	+	pub fn get_glob_patterns(&self) -> Vec<PathBuf> {
	38	+	self.types
	39	+	.clone()
	40	+	.unwrap_or_else(\|\| String::from("*"))
	41	+	.split(',')
	42	+	.map(\|filetype\| format!("*.{}", filetype))
	43	+	.map(\|filetype\| {
	44	+	vec![self.get_directory().unwrap(), String::from("**"), filetype]
	45	+	.iter()
	46	+	.collect()
	47	+	})
	48	+	.collect()
39	49		}
40	50		}
41	51

■ ■ ■ ■ ■ ■

src/scanner.rs

1		-	use std::{fs, path::PathBuf};
2		-	use indicatif::{HumanDuration, MultiProgress, ProgressBar, ProgressStyle, ParallelProgressIterator};
3	1		use anyhow::Result;
4		-	use fxhash::hash32 as hasher;
	2	+	use dashmap::DashMap;
	3	+	use fxhash::hash64 as hasher;
5	4		use glob::glob;
6		-	use itertools::Itertools;
	5	+	use indicatif::{ParallelProgressIterator, ProgressStyle};
	6	+	use memmap2::Mmap;
7	7		use rayon::prelude::*;
	8	+	use std::hash::Hasher;
	9	+	use std::{fs, path::PathBuf};
8	10
9		-	use crate::{
10		-	database::{self, File},
11		-	params::Params,
12		-	};
	11	+	use crate::{file_manager::File, params::Params};
13	12
14		-	pub fn duplicates(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<File>> {
15		-	let scan_results = scan(app_opts, connection)?;
16		-	let base_path = app_opts.get_directory()?;
17		-
18		-	index_files(scan_results, connection)?;
19		-	database::duplicate_hashes(connection, &base_path)
	13	+	#[derive(Clone, Copy)]
	14	+	enum IndexCritera {
	15	+	Size,
	16	+	Hash,
20	17		}
21	18
22		-	fn get_glob_patterns(opts: &Params, directory: &str) -> Vec<PathBuf> {
23		-	opts.types
24		-	.clone()
25		-	.unwrap_or_else(\|\| String::from("*"))
26		-	.split(',')
27		-	.map(\|filetype\| format!("*.{}", filetype))
28		-	.map(\|filetype\| {
29		-	vec![directory.to_owned(), String::from("**"), filetype]
30		-	.iter()
31		-	.collect()
32		-	})
33		-	.collect()
34		-	}
	19	+	pub fn duplicates(app_opts: &Params) -> Result<DashMap<String, Vec<File>>> {
	20	+	let scan_results = scan(app_opts)?;
	21	+	let size_index_store = index_files(scan_results, IndexCritera::Size)?;
35	22
36		-	fn is_indexed_file(path: impl Into<String>, indexed: &[File]) -> bool {
37		-	indexed
38		-	.iter()
39		-	.map(\|file\| file.path.clone())
40		-	.contains(&path.into())
	23	+	let sizewize_duplicate_files = size_index_store
	24	+	.into_par_iter()
	25	+	.filter(\|(_, files)\| files.len() > 1)
	26	+	.map(\|(_, files)\| files)
	27	+	.flatten()
	28	+	.collect::<Vec<File>>();
	29	+
	30	+	if sizewize_duplicate_files.len() > 1 {
	31	+	let size_wise_duplicate_paths = sizewize_duplicate_files
	32	+	.into_par_iter()
	33	+	.map(\|file\| file.path)
	34	+	.collect::<Vec<String>>();
	35	+
	36	+	let hash_index_store = index_files(size_wise_duplicate_paths, IndexCritera::Hash)?;
	37	+	let duplicate_files = hash_index_store
	38	+	.into_par_iter()
	39	+	.filter(\|(_, files)\| files.len() > 1)
	40	+	.collect();
	41	+
	42	+	Ok(duplicate_files)
	43	+	} else {
	44	+	Ok(DashMap::new())
	45	+	}
41	46		}
42	47
43		-	fn scan(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<String>> {
44		-	let directory = app_opts.get_directory()?;
45		-	let glob_patterns: Vec<PathBuf> = get_glob_patterns(app_opts, &directory);
46		-	let indexed_paths = database::indexed_paths(connection)?;
	48	+	fn scan(app_opts: &Params) -> Result<Vec<String>> {
	49	+	let glob_patterns: Vec<PathBuf> = app_opts.get_glob_patterns();
47	50		let files: Vec<String> = glob_patterns
48	51		.par_iter()
49		-	.progress_with_style(ProgressStyle::with_template("{spinner:.green} [scanning files] [{wide_bar:.cyan/blue}] {pos}/{len} files").unwrap())
	52	+	.progress_with_style(ProgressStyle::with_template(
	53	+	"{spinner:.green} [scanning files] [{wide_bar:.cyan/blue}] {pos}/{len} files",
	54	+	)?)
50	55		.filter_map(\|glob_pattern\| glob(glob_pattern.as_os_str().to_str()?).ok())
51	56		.flat_map(\|file_vec\| {
52	57		file_vec
53	58		.filter_map(\|x\| Some(x.ok()?.as_os_str().to_str()?.to_string()))
54		-	.filter(\|fpath\| !is_indexed_file(fpath, &indexed_paths))
55	59		.filter(\|glob_result\| {
56	60		fs::metadata(glob_result)
57	61		.map(\|f\| f.is_file())
		skipped 6 lines
64	68		Ok(files)
65	69		}
66	70
67		-	fn index_files(files: Vec<String>, connection: &sqlite::Connection) -> Result<()> {
68		-	let hashed: Vec<File> = files
	71	+	fn process_file_size_index(fpath: String) -> Result<File> {
	72	+	Ok(File {
	73	+	path: fpath.clone(),
	74	+	size: Some(fs::metadata(fpath)?.len()),
	75	+	hash: None,
	76	+	})
	77	+	}
	78	+
	79	+	fn process_file_hash_index(fpath: String) -> Result<File> {
	80	+	Ok(File {
	81	+	path: fpath.clone(),
	82	+	size: None,
	83	+	hash: Some(hash_file(&fpath).unwrap_or_default()),
	84	+	})
	85	+	}
	86	+
	87	+	fn process_file_index(
	88	+	fpath: String,
	89	+	store: &DashMap<String, Vec<File>>,
	90	+	index_criteria: IndexCritera,
	91	+	) {
	92	+	match index_criteria {
	93	+	IndexCritera::Size => {
	94	+	let processed_file = process_file_size_index(fpath).unwrap();
	95	+	store
	96	+	.entry(processed_file.size.unwrap_or_default().to_string())
	97	+	.and_modify(\|fileset\| fileset.push(processed_file.clone()))
	98	+	.or_insert_with(\|\| vec![processed_file]);
	99	+	}
	100	+	IndexCritera::Hash => {
	101	+	let processed_file = process_file_hash_index(fpath).unwrap();
	102	+	let indexhash = processed_file.clone().hash.unwrap_or_default();
	103	+
	104	+	store
	105	+	.entry(indexhash)
	106	+	.and_modify(\|fileset\| fileset.push(processed_file.clone()))
	107	+	.or_insert_with(\|\| vec![processed_file]);
	108	+	}
	109	+	}
	110	+	}
	111	+
	112	+	fn index_files(
	113	+	files: Vec<String>,
	114	+	index_criteria: IndexCritera,
	115	+	) -> Result<DashMap<String, Vec<File>>> {
	116	+	let store: DashMap<String, Vec<File>> = DashMap::new();
	117	+	files
69	118		.into_par_iter()
70		-	.progress_with_style(ProgressStyle::with_template("{spinner:.green} [indexing files] [{wide_bar:.cyan/blue}] {pos}/{len} files").unwrap())
71		-	.filter_map(\|file\| {
72		-	let hash = hash_file(&file).ok()?;
73		-	Some(database::File { path: file, hash })
74		-	})
75		-	.collect();
	119	+	.progress_with_style(ProgressStyle::with_template(
	120	+	"{spinner:.green} [indexing files] [{wide_bar:.cyan/blue}] {pos}/{len} files",
	121	+	)?)
	122	+	.for_each(\|file\| process_file_index(file, &store, index_criteria));
76	123
77		-	hashed
78		-	.iter()
79		-	.try_for_each(\|file\| database::put(file, connection))
	124	+	Ok(store)
80	125		}
81	126
82		-	pub fn hash_file(filepath: &str) -> Result<String> {
	127	+	pub fn incremental_hashing(filepath: &str) -> Result<String> {
	128	+	let file = fs::File::open(filepath)?;
	129	+	let fmap = unsafe { Mmap::map(&file)? };
	130	+	let mut inchasher = fxhash::FxHasher::default();
	131	+
	132	+	fmap.chunks(1_000_000)
	133	+	.for_each(\|mega\| inchasher.write(mega));
	134	+
	135	+	Ok(format!("{}", inchasher.finish()))
	136	+	}
	137	+
	138	+	pub fn standard_hashing(filepath: &str) -> Result<String> {
83	139		let file = fs::read(filepath)?;
84		-	let hash = hasher(&*file).to_string();
	140	+	Ok(hasher(&*file).to_string())
	141	+	}
	142	+
	143	+	pub fn hash_file(filepath: &str) -> Result<String> {
	144	+	let filemeta = fs::metadata(filepath)?;
85	145
86		-	Ok(hash)
	146	+	// NOTE: USE INCREMENTAL HASHING ONLY FOR FILES > 100MB
	147	+	match filemeta.len() < 100_000_000 {
	148	+	true => standard_hashing(filepath),
	149	+	false => incremental_hashing(filepath),
	150	+	}
87	151		}
88	152

Merge pull request #22 from sreedevk/development