| 1 | + | use crate::{file_manager::File, filters, params::Params}; |
1 | 2 | | use anyhow::Result; |
2 | 3 | | use dashmap::DashMap; |
3 | 4 | | use fxhash::hash64 as hasher; |
| skipped 3 lines |
7 | 8 | | use rayon::prelude::*; |
8 | 9 | | use std::hash::Hasher; |
9 | 10 | | use std::{fs, path::PathBuf}; |
10 | | - | |
11 | | - | use crate::{file_manager::File, params::Params}; |
12 | 11 | | |
13 | 12 | | #[derive(Clone, Copy)] |
14 | 13 | | enum IndexCritera { |
| skipped 13 lines |
28 | 27 | | .collect::<Vec<File>>(); |
29 | 28 | | |
30 | 29 | | if sizewize_duplicate_files.len() > 1 { |
31 | | - | let size_wise_duplicate_paths = sizewize_duplicate_files |
32 | | - | .into_par_iter() |
33 | | - | .map(|file| file.path) |
34 | | - | .collect::<Vec<String>>(); |
35 | | - | |
36 | | - | let hash_index_store = index_files(size_wise_duplicate_paths, IndexCritera::Hash)?; |
| 30 | + | let hash_index_store = index_files(sizewize_duplicate_files, IndexCritera::Hash)?; |
37 | 31 | | let duplicate_files = hash_index_store |
38 | 32 | | .into_par_iter() |
39 | 33 | | .filter(|(_, files)| files.len() > 1) |
| skipped 5 lines |
45 | 39 | | } |
46 | 40 | | } |
47 | 41 | | |
48 | | - | fn scan(app_opts: &Params) -> Result<Vec<String>> { |
| 42 | + | fn scan(app_opts: &Params) -> Result<Vec<File>> { |
49 | 43 | | let glob_patterns: Vec<PathBuf> = app_opts.get_glob_patterns(); |
50 | | - | let files: Vec<String> = glob_patterns |
| 44 | + | let files: Vec<File> = glob_patterns |
51 | 45 | | .par_iter() |
52 | 46 | | .progress_with_style(ProgressStyle::with_template( |
53 | 47 | | "{spinner:.green} [scanning files] [{wide_bar:.cyan/blue}] {pos}/{len} files", |
| skipped 9 lines |
63 | 57 | | }) |
64 | 58 | | .collect::<Vec<String>>() |
65 | 59 | | }) |
| 60 | + | .map(|file_path| File { |
| 61 | + | path: file_path.clone(), |
| 62 | + | hash: None, |
| 63 | + | size: Some(fs::metadata(file_path).unwrap().len()), |
| 64 | + | }) |
| 65 | + | .filter(|file| filters::is_file_gt_minsize(app_opts, file)) |
66 | 66 | | .collect(); |
67 | 67 | | |
68 | 68 | | Ok(files) |
69 | 69 | | } |
70 | 70 | | |
71 | | - | fn process_file_size_index(fpath: String) -> Result<File> { |
| 71 | + | fn process_file_hash_index(file: &File) -> Result<File> { |
72 | 72 | | Ok(File { |
73 | | - | path: fpath.clone(), |
74 | | - | size: Some(fs::metadata(fpath)?.len()), |
75 | | - | hash: None, |
76 | | - | }) |
77 | | - | } |
78 | | - | |
79 | | - | fn process_file_hash_index(fpath: String) -> Result<File> { |
80 | | - | Ok(File { |
81 | | - | path: fpath.clone(), |
82 | | - | size: None, |
83 | | - | hash: Some(hash_file(&fpath).unwrap_or_default()), |
| 73 | + | path: file.path.clone(), |
| 74 | + | size: file.size, |
| 75 | + | hash: Some(hash_file(&file.path).unwrap_or_default()), |
84 | 76 | | }) |
85 | 77 | | } |
86 | 78 | | |
87 | 79 | | fn process_file_index( |
88 | | - | fpath: String, |
| 80 | + | file: File, |
89 | 81 | | store: &DashMap<String, Vec<File>>, |
90 | 82 | | index_criteria: IndexCritera, |
91 | 83 | | ) { |
92 | 84 | | match index_criteria { |
93 | 85 | | IndexCritera::Size => { |
94 | | - | let processed_file = process_file_size_index(fpath).unwrap(); |
95 | 86 | | store |
96 | | - | .entry(processed_file.size.unwrap_or_default().to_string()) |
97 | | - | .and_modify(|fileset| fileset.push(processed_file.clone())) |
98 | | - | .or_insert_with(|| vec![processed_file]); |
| 87 | + | .entry(file.size.unwrap_or_default().to_string()) |
| 88 | + | .and_modify(|fileset| fileset.push(file.clone())) |
| 89 | + | .or_insert_with(|| vec![file]); |
99 | 90 | | } |
100 | 91 | | IndexCritera::Hash => { |
101 | | - | let processed_file = process_file_hash_index(fpath).unwrap(); |
| 92 | + | let processed_file = process_file_hash_index(&file).unwrap(); |
102 | 93 | | let indexhash = processed_file.clone().hash.unwrap_or_default(); |
103 | 94 | | |
104 | 95 | | store |
| skipped 5 lines |
110 | 101 | | } |
111 | 102 | | |
112 | 103 | | fn index_files( |
113 | | - | files: Vec<String>, |
| 104 | + | files: Vec<File>, |
114 | 105 | | index_criteria: IndexCritera, |
115 | 106 | | ) -> Result<DashMap<String, Vec<File>>> { |
116 | 107 | | let store: DashMap<String, Vec<File>> = DashMap::new(); |
| skipped 7 lines |
124 | 115 | | Ok(store) |
125 | 116 | | } |
126 | 117 | | |
127 | | - | pub fn incremental_hashing(filepath: &str) -> Result<String> { |
| 118 | + | fn incremental_hashing(filepath: &str) -> Result<String> { |
128 | 119 | | let file = fs::File::open(filepath)?; |
129 | 120 | | let fmap = unsafe { Mmap::map(&file)? }; |
130 | 121 | | let mut inchasher = fxhash::FxHasher::default(); |
| skipped 4 lines |
135 | 126 | | Ok(format!("{}", inchasher.finish())) |
136 | 127 | | } |
137 | 128 | | |
138 | | - | pub fn standard_hashing(filepath: &str) -> Result<String> { |
| 129 | + | fn standard_hashing(filepath: &str) -> Result<String> { |
139 | 130 | | let file = fs::read(filepath)?; |
140 | 131 | | Ok(hasher(&*file).to_string()) |
141 | 132 | | } |
142 | 133 | | |
143 | | - | pub fn hash_file(filepath: &str) -> Result<String> { |
| 134 | + | fn hash_file(filepath: &str) -> Result<String> { |
144 | 135 | | let filemeta = fs::metadata(filepath)?; |
145 | 136 | | |
146 | 137 | | // NOTE: USE INCREMENTAL HASHING ONLY FOR FILES > 100MB |
| skipped 6 lines |