diff options
-rwxr-xr-x | bin/hashl | 226 | ||||
-rw-r--r-- | lib/App/Hashl.pm | 336 |
2 files changed, 303 insertions, 259 deletions
@@ -14,20 +14,20 @@ use Getopt::Long; use IO::Handle; use Time::Progress; -my $base = getcwd(); -my $rel_paths = 1; -my $db_file = '.hashl.db'; -my $total = 0; -my $cur = 0; +my $base = getcwd(); +my $rel_paths = 1; +my $db_file = '.hashl.db'; +my $total = 0; +my $cur = 0; my $show_progress = 1; my $timer; my $incoming_dir; my $read_size; -my ($find_ref, $find_db_write); +my ( $find_ref, $find_db_write ); my $hashl; -my $VERSION = '0.1'; +our $VERSION = '0.1'; STDERR->autoflush(1); @@ -38,7 +38,7 @@ GetOptions( 'V|version' => sub { say "hashl version ${VERSION}"; exit 0 }, ) or usage(); -if (substr($db_file, 0, 1) ne '/') { +if ( substr( $db_file, 0, 1 ) ne q{/} ) { $db_file = "${base}/${db_file}"; } @@ -49,24 +49,21 @@ sub usage { Usage: $0 [options] <update|list|info|...> [args] See 'perldoc -F $0' (or 'man hashl' if it is properly installed) EOF - } -if (not defined $action) { +if ( not defined $action ) { usage(); } -if (-r $db_file) { +if ( -r $db_file ) { $hashl = App::Hashl->new_from_file($db_file); } else { - $hashl = App::Hashl->new( - read_size => $read_size, - ); + $hashl = App::Hashl->new( read_size => $read_size ); } -$SIG{INT} = \&quit_save_db; -$SIG{TERM} = \&quit_save_db; +local $SIG{INT} = \&quit_save_db; +local $SIG{TERM} = \&quit_save_db; sub quit_save_db { $hashl->save($db_file); @@ -75,83 +72,97 @@ sub quit_save_db { sub get_total { my $file = $File::Find::name; - if (-f $file and not -l $file and $file ne $db_file) { + + if ( -f $file and not -l $file and $file ne $db_file ) { $total++; } + + return; } sub drop_deleted { - for my $file ($hashl->files()) { - if (! -e $file) { + for my $file ( $hashl->files() ) { + if ( not -e $file ) { $hashl->delete_file($file); } } + + return; } sub copy_file { - my ($file, $to) = @_; + my ( $file, $to ) = @_; - my $base = substr($file, length($base) + 1); - if ($base =~ s{ / [^/]+ $}{}x) { - mkdirs($incoming_dir, $base); + my $cp_base = substr( $file, length($base) + 1 ); + if ( $base =~ s{ / [^/]+ $}{}x ) { + mkdirs( $incoming_dir, $cp_base ); } - copy($file, "${to}/${base}") - or die("Cannot copy ${file} to ${to}/${base}: $!\n"); + copy( $file, "${to}/${cp_base}" ) + or die("Cannot copy ${file} to ${to}/${cp_base}: $!\n"); + + return; } sub db_find_new { - my ($file, $path) = @_; + my ( $file, $path ) = @_; - if (not $hashl->file_in_db($path)) { + if ( not $hashl->file_in_db($path) ) { say "\r\e[2K${file}"; } + + return; } sub db_find_known { - my ($file, $path) = @_; + my ( $file, $path ) = @_; - if ($hashl->file_in_db($path)) { + if ( $hashl->file_in_db($path) ) { say "\r\e[2K${file}"; } + + return; } sub db_info { printf( "Read size: %d bytes (%s)\n", - $hashl->read_size(), - $hashl->si_size($hashl->read_size), + $hashl->read_size(), $hashl->si_size( $hashl->read_size ), ); + + return; } sub file_info { my ($file) = @_; + my $entry = $hashl->file($file); - if (not $entry) { + if ( not $entry ) { die("No such file in database\n"); } printf( "File: %s\nSize: %d bytes (%s)\nHash: %s\n", - $file, - $entry->{size}, - $hashl->si_size($entry->{size}), + $file, $entry->{size}, $hashl->si_size( $entry->{size} ), $entry->{hash}, ); + + return; } sub process_file { - my ($code, $write) = ($find_ref, $find_db_write); + my ( $code, $write ) = ( $find_ref, $find_db_write ); + my $file = $File::Find::name; my $path = $file; - if (not -f $file or -l $file or $file eq $db_file) { + if ( not -f $file or -l $file or $file eq $db_file ) { return; } if ($rel_paths) { - $file = substr($file, length($base) + 1); + $file = substr( $file, length($base) + 1 ); } $cur++; @@ -159,86 +170,100 @@ sub process_file { if ($show_progress) { print STDERR $timer->report( "\r\e[2KScanning directory: %p done, %L elapsed, %E remaining", - $cur, - ); + $cur, ); } - &{$code}($file, $path); + &{$code}( $file, $path ); - if ($write and (($cur % 5000) == 0)) { + if ( $write and ( ( $cur % 5000 ) == 0 ) ) { $hashl->save($db_file); } + + return; } sub db_update { - my ($file, $path) = @_; + my ( $file, $path ) = @_; $hashl->add_file( file => $file, path => $path, ); + + return; } sub db_ignore { - my ($file, $path) = @_; + my ( $file, $path ) = @_; + + $hashl->ignore( $file, $path ); - $hashl->ignore($file, $path); + return; } sub db_copy { - my ($file, $path) = @_; + my ( $file, $path ) = @_; - if (not $hashl->file_in_db($path)) { - copy_file($path, $incoming_dir); + if ( not $hashl->file_in_db($path) ) { + copy_file( $path, $incoming_dir ); } + + return; } sub mkdirs { - my ($base, $new) = @_; + my ( $dir_base, $new ) = @_; - for my $dir (split(qr{/}, $new)) { - $base .= "/$dir"; - if (! -d $base) { - mkdir($base) - or die("Cannot create ${base}: $!\n"); + for my $dir ( split( qr{/}, $new ) ) { + $dir_base .= "/$dir"; + if ( !-d $dir_base ) { + mkdir($dir_base) + or die("Cannot create ${dir_base}: $!\n"); } } + + return; } sub prepare_db_run { my ($dir) = @_; + $dir //= $base; - if (not $show_progress) { + if ( not $show_progress ) { return; } - find(\&get_total, $dir); + find( \&get_total, $dir ); $timer = Time::Progress->new(); $timer->attr( min => 1, max => $total, ); + return; } sub cmd_copy { - prepare_db_run(); ($incoming_dir) = @_; - if (not $incoming_dir) { + prepare_db_run(); + + if ( not $incoming_dir ) { usage(); } - if (substr($incoming_dir, 0, 1) ne '/') { - $incoming_dir = $base . '/' . $incoming_dir; + if ( substr( $incoming_dir, 0, 1 ) ne q{/} ) { + $incoming_dir = $base . q{/} . $incoming_dir; } - $find_ref = \&db_copy; + $find_ref = \&db_copy; $find_db_write = 0; - find(\&process_file, $base); + find( \&process_file, $base ); print "\n"; + + return; } sub cmd_find_known { @@ -246,16 +271,18 @@ sub cmd_find_known { $dir //= $base; - if (substr($dir, 0, 1) ne '/') { - $dir = $base . '/' . $dir; + if ( substr( $dir, 0, 1 ) ne q{/} ) { + $dir = $base . q{/} . $dir; } prepare_db_run($dir); - $find_ref = \&db_find_known; + $find_ref = \&db_find_known; $find_db_write = 0; - find(\&process_file, $dir); + find( \&process_file, $dir ); print "\n"; + + return; } sub cmd_find_new { @@ -263,16 +290,18 @@ sub cmd_find_new { $new_dir //= $base; - if (substr($new_dir, 0, 1) ne '/') { - $new_dir = $base . '/' . $new_dir; + if ( substr( $new_dir, 0, 1 ) ne q{/} ) { + $new_dir = $base . q{/} . $new_dir; } prepare_db_run($new_dir); - $find_ref = \&db_find_new; + $find_ref = \&db_find_new; $find_db_write = 0; - find(\&process_file, $new_dir); + find( \&process_file, $new_dir ); print "\n"; + + return; } sub cmd_ignore { @@ -282,15 +311,17 @@ sub cmd_ignore { prepare_db_run(); - if (substr($ign_dir, 0, 1) ne '/') { - $ign_dir = $base . '/' . $ign_dir; + if ( substr( $ign_dir, 0, 1 ) ne q{/} ) { + $ign_dir = $base . q{/} . $ign_dir; } - $find_ref = \&db_ignore; + $find_ref = \&db_ignore; $find_db_write = 1; - find(\&process_file, $ign_dir); + find( \&process_file, $ign_dir ); $hashl->save($db_file); print "\n"; + + return; } sub cmd_info { @@ -302,44 +333,53 @@ sub cmd_info { else { db_info(); } + + return; } sub cmd_list { printf( "# hashl v%s Read Size %d bytes (%s)\n", - $VERSION, - $hashl->read_size(), - $hashl->si_size($hashl->read_size()), + $VERSION, $hashl->read_size(), $hashl->si_size( $hashl->read_size() ), ); - for my $name (sort $hashl->files()) { + + for my $name ( sort $hashl->files() ) { my $file = $hashl->file($name); - printf( - "%s %-7s %s\n", - $file->{hash}, - $hashl->si_size($file->{size}), - $name - ); + printf( "%s %-7s %s\n", + $file->{hash}, $hashl->si_size( $file->{size} ), $name ); } + + return; } sub cmd_list_files { - say join("\n", sort $hashl->files()); + say join( "\n", sort $hashl->files() ); + + return; } sub cmd_list_ignored { - for my $hash ($hashl->ignored()) { + for my $hash ( $hashl->ignored() ) { say $hash; } + + return; } sub cmd_update { drop_deleted(); prepare_db_run(); - $find_ref = \&db_update; + + $find_ref = \&db_update; $find_db_write = 1; - find(\&process_file, $base); + + find( \&process_file, $base ); + print "\n"; + $hashl->save($db_file); + + return; } given ($action) { @@ -352,7 +392,7 @@ given ($action) { when ('list-files') { cmd_list_files(@ARGV) } when ('list-ignored') { cmd_list_ignored(@ARGV) } when ('update') { cmd_update(@ARGV) } - default { usage() } + default { usage() } } __END__ @@ -365,6 +405,10 @@ B<hashl> - Create database with partial file hashes, check if other files are in B<hashl> [B<-d> I<dbfile>] [B<-s> I<read-size>] I<action> [I<args>] +=head1 VERSION + +This manual documents hashl version 0.1 + =head1 DESCRIPTION Actions: @@ -439,6 +483,10 @@ Print version information. =back +=head1 EXIT STATUS + +Unless an error occured, B<hashl> will always return zero. + =head1 CONFIGURATION None, so far diff --git a/lib/App/Hashl.pm b/lib/App/Hashl.pm index e8015b2..6ca02bc 100644 --- a/lib/App/Hashl.pm +++ b/lib/App/Hashl.pm @@ -7,7 +7,164 @@ use 5.010; use Digest::SHA qw(sha1_hex); use Storable qw(nstore retrieve); -my $VERSION = '0.1'; +our $VERSION = '0.1'; + +sub new { + my ( $obj, %conf ) = @_; + + my $ref = { + files => {}, + ignored => {}, + }; + + $ref->{config} = \%conf; + $ref->{config}->{read_size} //= ( 2**20 ) * 4; # 4 MiB + + return bless( $ref, $obj ); +} + +sub new_from_file { + my ( $obj, $file ) = @_; + my $ref = retrieve($file); + return bless( $ref, $obj ); +} + +sub si_size { + my ( $self, $bytes ) = @_; + my @post = ( q{ }, qw(k M G T) ); + + while ( $bytes >= 1024 ) { + $bytes /= 1024; + shift @post; + } + + return sprintf( '%6.1f%s', $bytes, $post[0] ); +} + +sub hash_file { + my ( $self, $file ) = @_; + my $data; + + #<<< perltidy has problems indenting 'or die' with tabs + + open( my $fh, '<', $file ) + or die("Can't open ${file} for reading: $!\n"); + binmode($fh) + or die("Can't set binmode on ${file}: $!\n"); + read( $fh, $data, $self->{config}->{read_size} ) + or die("Can't read ${file}: $!\n"); + close($fh) + or die("Can't close ${file}: $!\n"); + + #>>> + return sha1_hex($data); +} + +sub hash_in_db { + my ( $self, $hash ) = @_; + + if ( $self->{ignored}->{$hash} ) { + return '// ignored'; + } + + for my $name ( $self->files() ) { + my $file = $self->file($name); + + if ( $file->{hash} eq $hash ) { + return $name; + } + } + return; +} + +sub file_in_db { + my ( $self, $file ) = @_; + + return $self->hash_in_db( $self->hash_file($file) ); +} + +sub read_size { + my ($self) = @_; + + return $self->{config}->{read_size}; +} + +sub file { + my ( $self, $name ) = @_; + + return $self->{files}->{$name}; +} + +sub delete_file { + my ( $self, $name ) = @_; + + delete $self->{files}->{$name}; + + return; +} + +sub files { + my ($self) = @_; + + return keys %{ $self->{files} }; +} + +sub add_file { + my ( $self, %data ) = @_; + my $file = $data{file}; + my $path = $data{path}; + my ( $size, $mtime ) = ( stat($path) )[ 7, 9 ]; + + if ( $self->file($file) + and $self->file($file)->{mtime} == $mtime + and $self->file($file)->{size} == $size ) + { + return; + } + + my $hash = $self->hash_file($path); + + if ( $self->{ignored}->{$hash} ) { + return; + } + + $self->{files}->{$file} = { + hash => $hash, + mtime => $mtime, + size => $size, + }; + + return 1; +} + +sub ignored { + my ($self) = @_; + + if ( exists $self->{ignored} ) { + return keys %{ $self->{ignored} }; + } + + return (); +} + +sub ignore { + my ( $self, $file, $path ) = @_; + + $self->delete_file($file); + $self->{ignored}->{ $self->hash_file($path) } = 1; + + return 1; +} + +sub save { + my ( $self, $file ) = @_; + + return nstore( $self, $file ); +} + +1; + +__END__ =head1 NAME @@ -47,126 +204,37 @@ How many bytes of a file to consider for the hash. Defaults to 4 MiB (4 * =back -=cut - -sub new { - my ($obj, %conf) = @_; - my $ref = { - files => {}, - ignored => {}, - }; - - $ref->{config} = \%conf; - $ref->{config}->{read_size} //= (2 ** 20) * 4, # 4 MiB - - return bless($ref, $obj); -} - =item $hashl = App::Hashl->new_from_file(I<$file>) Returns the B<App::Hashl> object saved to I<file> by a prior $hashl->save call. -=cut - -sub new_from_file { - my ($obj, $file) = @_; - my $ref = retrieve($file); - return bless($ref, $obj); -} - =item $hashl->si_size(I<$bytes>) Returns I<bytes> as a human-readable SI-size, such as "1.0k", "50.7M", "2.1G". The returned string is always six characters long. -=cut - -sub si_size { - my ($self, $bytes) = @_; - my @post = (' ', qw(k M G T)); - - while ($bytes >= 1024) { - $bytes /= 1024; - shift @post; - } - - return sprintf("%6.1f%s", $bytes, $post[0]); -} - =item $hashl->hash_file(I<$file>) Returns the SHA1 hash of the first n bytes (as configured via B<read_size>) of I<file> -=cut - -sub hash_file { - my ($self, $file) = @_; - my ($fh, $data); - - open($fh, '<', $file) - or die("Can't open ${file} for reading: $!\n");; - binmode($fh) - or die("Can't set binmode on ${file}: $!\n"); - read($fh, $data, $self->{config}->{read_size}) - or die("Can't read ${file}: $!\n"); - close($fh) - or die("Can't close ${file}: $!\n"); - - return sha1_hex($data); -} - =item $hashl->hash_in_db(I<$hash>) Checks if I<hash> is in the database. If it is, returns the filename it is associated with. If it is ignored, returns "// ignored" (subject to change). -Otherwise, returns undef. - -=cut - -sub hash_in_db { - my ($self, $hash) = @_; - - if ($self->{ignored}->{$hash}) { - return '// ignored'; - } - - for my $name ($self->files()) { - my $file = $self->file($name); - - if ($file->{hash} eq $hash) { - return $name; - } - } - return undef; -} +Otherwise, returns false. =item $hashl->file_in_db(I<$file>) Checks if I<file>'s hash is in the database. For the return value, see B<hash_in_db>. -=cut - -sub file_in_db { - my ($self, $file) = @_; - - return $self->hash_in_db($self->hash_file($file)); -} - =item $hashl->read_size() Returns the current read size. Note that once an B<App::Hashl> object has been created, it is not possible to change the read size. -=cut - -sub read_size { - my ($self) = @_; - return $self->{config}->{read_size}; -} - =item $hashl->file(I<$name>) Returns a hashref describing the file. The layout is as follows: @@ -175,35 +243,14 @@ Returns a hashref describing the file. The layout is as follows: mtime => mtime as UNIX timestamp, size => file size in bytes, -=cut - -sub file { - my ($self, $name) = @_; - return $self->{files}->{$name}; -} - =item $hashl->delete_file(I<$name>) Remove the file from the database -=cut - -sub delete_file { - my ($self, $name) = @_; - delete $self->{files}->{$name}; -} - =item $hashl->files() Returns a list of all file names in the database -=cut - -sub files { - my ($self) = @_; - return keys %{ $self->{files} }; -} - =item $hashl->add_file(I<%data>) Add a file to the database. Required keys in I<%data> are: @@ -223,84 +270,33 @@ Full path to the file If the file already is in the database, it is only updated if both the file size and the mtime have changed. -=cut - -sub add_file { - my ($self, %data) = @_; - my $file = $data{file}; - my $path = $data{path}; - my ($size, $mtime) = (stat($path))[7,9]; - - if ($self->file($file) and - $self->file($file)->{mtime} == $mtime and - $self->file($file)->{size} == $size ) { - return; - } - - my $hash = $self->hash_file($path); - - if ($self->{ignored}->{$hash}) { - return; - } - - $self->{files}->{$file} = { - hash => $hash, - mtime => $mtime, - size => $size, - }; -} - =item $hashl->ignored() Returns a list of all ignored file hashes -=cut - -sub ignored { - my ($self) = @_; - if (exists $self->{ignored}) { - return keys %{ $self->{ignored} }; - } - else { - return (); - } -} - =item $hashl->ignore(I<$file>, I<$path>) Removes I<$file> from the database and adds I<$path> to the list of ignored file hashes. -=cut - -sub ignore { - my ($self, $file, $path) = @_; - - $self->delete_file($file); - $self->{ignored}->{ $self->hash_file($path) } = 1; -} - =item $hashl->save(I<$file>) Save the B<App::Hashl> object with all data to I<$file>. It can later be retrieved via B<new_from_file>. -=cut +=back -sub save { - my ($self, $file) = @_; - nstore($self, $file); -} +=head1 DIAGNOSTICS -1; +FIXME -__END__ +=head1 DEPENDENCIES -=back +Digest::SHA(3pm); -=head1 DEPENDENCIES +=head1 BUGS AND LIMITATIONS -B<Digest::SHA>. +FIXME =head1 AUTHOR |