1 | | - | use crate::database; |
2 | | - | use crate::{params::Params, database::File}; |
| 1 | + | use std::{fs, path::PathBuf}; |
| 2 | + | |
3 | 3 | | use anyhow::Result; |
| 4 | + | use fxhash::hash32 as hasher; |
4 | 5 | | use glob::glob; |
5 | 6 | | use itertools::Itertools; |
6 | 7 | | use rayon::prelude::*; |
7 | | - | use std::fs; |
8 | | - | use std::path::PathBuf; |
9 | | - | use fxhash::hash32 as hasher; |
| 8 | + | |
| 9 | + | use crate::{ |
| 10 | + | database::{self, File}, |
| 11 | + | params::Params, |
| 12 | + | }; |
10 | 13 | | |
11 | 14 | | pub fn duplicates(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<File>> { |
12 | 15 | | let scan_results = scan(app_opts, connection)?; |
13 | 16 | | let base_path = app_opts.get_directory()?; |
14 | 17 | | |
15 | | - | index_files(scan_results, connection); |
| 18 | + | index_files(scan_results, connection)?; |
16 | 19 | | database::duplicate_hashes(connection, &base_path) |
17 | 20 | | } |
18 | 21 | | |
19 | | - | fn get_glob_patterns(opts: &Params, directory: &String) -> Vec<PathBuf> { |
| 22 | + | fn get_glob_patterns(opts: &Params, directory: &str) -> Vec<PathBuf> { |
20 | 23 | | opts.types |
21 | 24 | | .clone() |
22 | | - | .unwrap_or(String::from("*")) |
23 | | - | .split(",") |
| 25 | + | .unwrap_or_else(|| String::from("*")) |
| 26 | + | .split(',') |
24 | 27 | | .map(|filetype| format!("*.{}", filetype)) |
25 | 28 | | .map(|filetype| { |
26 | | - | vec![directory.clone(), String::from("**"), filetype] |
| 29 | + | vec![directory.to_owned(), String::from("**"), filetype] |
27 | 30 | | .iter() |
28 | 31 | | .collect() |
29 | 32 | | }) |
30 | 33 | | .collect() |
31 | 34 | | } |
32 | 35 | | |
33 | | - | fn is_indexed_file(path: &String, indexed: &Vec<File>) -> bool { |
| 36 | + | fn is_indexed_file(path: impl Into<String>, indexed: &[File]) -> bool { |
34 | 37 | | indexed |
35 | | - | .into_iter() |
| 38 | + | .iter() |
36 | 39 | | .map(|file| file.path.clone()) |
37 | | - | .contains(path) |
| 40 | + | .contains(&path.into()) |
38 | 41 | | } |
39 | 42 | | |
40 | 43 | | fn scan(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<String>> { |
41 | 44 | | let directory = app_opts.get_directory()?; |
42 | | - | let glob_patterns: Vec<PathBuf> = get_glob_patterns(&app_opts, &directory); |
| 45 | + | let glob_patterns: Vec<PathBuf> = get_glob_patterns(app_opts, &directory); |
43 | 46 | | let indexed_paths = database::indexed_paths(connection)?; |
44 | 47 | | let files: Vec<String> = glob_patterns |
45 | | - | .into_par_iter() |
46 | | - | .map(|glob_pattern| glob(&glob_pattern.as_os_str().to_str().unwrap())) |
47 | | - | .map(|glob_result| glob_result.unwrap()) |
| 48 | + | .par_iter() |
| 49 | + | .filter_map(|glob_pattern| glob(glob_pattern.as_os_str().to_str()?).ok()) |
48 | 50 | | .flat_map(|file_vec| { |
49 | 51 | | file_vec |
50 | | - | .map(|x| x.unwrap().as_os_str().to_str().unwrap().to_string()) |
| 52 | + | .filter_map(|x| Some(x.ok()?.as_os_str().to_str()?.to_string())) |
51 | 53 | | .filter(|fpath| !is_indexed_file(fpath, &indexed_paths)) |
52 | | - | .filter(|glob_result| fs::metadata(glob_result).unwrap().is_file()) |
| 54 | + | .filter(|glob_result| { |
| 55 | + | fs::metadata(glob_result) |
| 56 | + | .map(|f| f.is_file()) |
| 57 | + | .unwrap_or(false) |
| 58 | + | }) |
53 | 59 | | .collect::<Vec<String>>() |
54 | 60 | | }) |
55 | 61 | | .collect(); |
| skipped 1 lines |
57 | 63 | | Ok(files) |
58 | 64 | | } |
59 | 65 | | |
60 | | - | fn index_files(files: Vec<String>, connection: &sqlite::Connection) { |
| 66 | + | fn index_files(files: Vec<String>, connection: &sqlite::Connection) -> Result<()> { |
61 | 67 | | let hashed: Vec<File> = files |
62 | 68 | | .into_par_iter() |
63 | | - | .map(|file| { |
64 | | - | let hash = hash_file(&file).unwrap(); |
65 | | - | database::File { path: file, hash } |
| 69 | + | .filter_map(|file| { |
| 70 | + | let hash = hash_file(&file).ok()?; |
| 71 | + | Some(database::File { path: file, hash }) |
66 | 72 | | }) |
67 | 73 | | .collect(); |
68 | 74 | | |
69 | | - | hashed.into_iter().for_each(|file| { |
70 | | - | database::put(&file, connection).unwrap(); |
71 | | - | }); |
| 75 | + | hashed |
| 76 | + | .iter() |
| 77 | + | .try_for_each(|file| database::put(file, connection)) |
72 | 78 | | } |
73 | 79 | | |
74 | 80 | | pub fn hash_file(filepath: &str) -> Result<String> { |
| skipped 6 lines |