Projects STRLCPY deduplicator Commits e6f93ce3
🤬
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■ ■
    src/scanner.rs
    1  -use crate::database;
    2  -use crate::{params::Params, database::File};
     1 +use std::{fs, path::PathBuf};
     2 + 
    3 3  use anyhow::Result;
     4 +use fxhash::hash32 as hasher;
    4 5  use glob::glob;
    5 6  use itertools::Itertools;
    6 7  use rayon::prelude::*;
    7  -use std::fs;
    8  -use std::path::PathBuf;
    9  -use fxhash::hash32 as hasher;
     8 + 
     9 +use crate::{
     10 + database::{self, File},
     11 + params::Params,
     12 +};
    10 13   
    11 14  pub fn duplicates(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<File>> {
    12 15   let scan_results = scan(app_opts, connection)?;
    13 16   let base_path = app_opts.get_directory()?;
    14 17   
    15  - index_files(scan_results, connection);
     18 + index_files(scan_results, connection)?;
    16 19   database::duplicate_hashes(connection, &base_path)
    17 20  }
    18 21   
    19  -fn get_glob_patterns(opts: &Params, directory: &String) -> Vec<PathBuf> {
     22 +fn get_glob_patterns(opts: &Params, directory: &str) -> Vec<PathBuf> {
    20 23   opts.types
    21 24   .clone()
    22  - .unwrap_or(String::from("*"))
    23  - .split(",")
     25 + .unwrap_or_else(|| String::from("*"))
     26 + .split(',')
    24 27   .map(|filetype| format!("*.{}", filetype))
    25 28   .map(|filetype| {
    26  - vec![directory.clone(), String::from("**"), filetype]
     29 + vec![directory.to_owned(), String::from("**"), filetype]
    27 30   .iter()
    28 31   .collect()
    29 32   })
    30 33   .collect()
    31 34  }
    32 35   
    33  -fn is_indexed_file(path: &String, indexed: &Vec<File>) -> bool {
     36 +fn is_indexed_file(path: impl Into<String>, indexed: &[File]) -> bool {
    34 37   indexed
    35  - .into_iter()
     38 + .iter()
    36 39   .map(|file| file.path.clone())
    37  - .contains(path)
     40 + .contains(&path.into())
    38 41  }
    39 42   
    40 43  fn scan(app_opts: &Params, connection: &sqlite::Connection) -> Result<Vec<String>> {
    41 44   let directory = app_opts.get_directory()?;
    42  - let glob_patterns: Vec<PathBuf> = get_glob_patterns(&app_opts, &directory);
     45 + let glob_patterns: Vec<PathBuf> = get_glob_patterns(app_opts, &directory);
    43 46   let indexed_paths = database::indexed_paths(connection)?;
    44 47   let files: Vec<String> = glob_patterns
    45  - .into_par_iter()
    46  - .map(|glob_pattern| glob(&glob_pattern.as_os_str().to_str().unwrap()))
    47  - .map(|glob_result| glob_result.unwrap())
     48 + .par_iter()
     49 + .filter_map(|glob_pattern| glob(glob_pattern.as_os_str().to_str()?).ok())
    48 50   .flat_map(|file_vec| {
    49 51   file_vec
    50  - .map(|x| x.unwrap().as_os_str().to_str().unwrap().to_string())
     52 + .filter_map(|x| Some(x.ok()?.as_os_str().to_str()?.to_string()))
    51 53   .filter(|fpath| !is_indexed_file(fpath, &indexed_paths))
    52  - .filter(|glob_result| fs::metadata(glob_result).unwrap().is_file())
     54 + .filter(|glob_result| {
     55 + fs::metadata(glob_result)
     56 + .map(|f| f.is_file())
     57 + .unwrap_or(false)
     58 + })
    53 59   .collect::<Vec<String>>()
    54 60   })
    55 61   .collect();
    skipped 1 lines
    57 63   Ok(files)
    58 64  }
    59 65   
    60  -fn index_files(files: Vec<String>, connection: &sqlite::Connection) {
     66 +fn index_files(files: Vec<String>, connection: &sqlite::Connection) -> Result<()> {
    61 67   let hashed: Vec<File> = files
    62 68   .into_par_iter()
    63  - .map(|file| {
    64  - let hash = hash_file(&file).unwrap();
    65  - database::File { path: file, hash }
     69 + .filter_map(|file| {
     70 + let hash = hash_file(&file).ok()?;
     71 + Some(database::File { path: file, hash })
    66 72   })
    67 73   .collect();
    68 74   
    69  - hashed.into_iter().for_each(|file| {
    70  - database::put(&file, connection).unwrap();
    71  - });
     75 + hashed
     76 + .iter()
     77 + .try_for_each(|file| database::put(file, connection))
    72 78  }
    73 79   
    74 80  pub fn hash_file(filepath: &str) -> Result<String> {
    skipped 6 lines
Please wait...
Page is in error, reload to recover