summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/hashl226
-rw-r--r--lib/App/Hashl.pm336
2 files changed, 303 insertions, 259 deletions
diff --git a/bin/hashl b/bin/hashl
index aaf17b9..ec93f24 100755
--- a/bin/hashl
+++ b/bin/hashl
@@ -14,20 +14,20 @@ use Getopt::Long;
use IO::Handle;
use Time::Progress;
-my $base = getcwd();
-my $rel_paths = 1;
-my $db_file = '.hashl.db';
-my $total = 0;
-my $cur = 0;
+my $base = getcwd();
+my $rel_paths = 1;
+my $db_file = '.hashl.db';
+my $total = 0;
+my $cur = 0;
my $show_progress = 1;
my $timer;
my $incoming_dir;
my $read_size;
-my ($find_ref, $find_db_write);
+my ( $find_ref, $find_db_write );
my $hashl;
-my $VERSION = '0.1';
+our $VERSION = '0.1';
STDERR->autoflush(1);
@@ -38,7 +38,7 @@ GetOptions(
'V|version' => sub { say "hashl version ${VERSION}"; exit 0 },
) or usage();
-if (substr($db_file, 0, 1) ne '/') {
+if ( substr( $db_file, 0, 1 ) ne q{/} ) {
$db_file = "${base}/${db_file}";
}
@@ -49,24 +49,21 @@ sub usage {
Usage: $0 [options] <update|list|info|...> [args]
See 'perldoc -F $0' (or 'man hashl' if it is properly installed)
EOF
-
}
-if (not defined $action) {
+if ( not defined $action ) {
usage();
}
-if (-r $db_file) {
+if ( -r $db_file ) {
$hashl = App::Hashl->new_from_file($db_file);
}
else {
- $hashl = App::Hashl->new(
- read_size => $read_size,
- );
+ $hashl = App::Hashl->new( read_size => $read_size );
}
-$SIG{INT} = \&quit_save_db;
-$SIG{TERM} = \&quit_save_db;
+local $SIG{INT} = \&quit_save_db;
+local $SIG{TERM} = \&quit_save_db;
sub quit_save_db {
$hashl->save($db_file);
@@ -75,83 +72,97 @@ sub quit_save_db {
sub get_total {
my $file = $File::Find::name;
- if (-f $file and not -l $file and $file ne $db_file) {
+
+ if ( -f $file and not -l $file and $file ne $db_file ) {
$total++;
}
+
+ return;
}
sub drop_deleted {
- for my $file ($hashl->files()) {
- if (! -e $file) {
+ for my $file ( $hashl->files() ) {
+ if ( not -e $file ) {
$hashl->delete_file($file);
}
}
+
+ return;
}
sub copy_file {
- my ($file, $to) = @_;
+ my ( $file, $to ) = @_;
- my $base = substr($file, length($base) + 1);
- if ($base =~ s{ / [^/]+ $}{}x) {
- mkdirs($incoming_dir, $base);
+ my $cp_base = substr( $file, length($base) + 1 );
+ if ( $base =~ s{ / [^/]+ $}{}x ) {
+ mkdirs( $incoming_dir, $cp_base );
}
- copy($file, "${to}/${base}")
- or die("Cannot copy ${file} to ${to}/${base}: $!\n");
+ copy( $file, "${to}/${cp_base}" )
+ or die("Cannot copy ${file} to ${to}/${cp_base}: $!\n");
+
+ return;
}
sub db_find_new {
- my ($file, $path) = @_;
+ my ( $file, $path ) = @_;
- if (not $hashl->file_in_db($path)) {
+ if ( not $hashl->file_in_db($path) ) {
say "\r\e[2K${file}";
}
+
+ return;
}
sub db_find_known {
- my ($file, $path) = @_;
+ my ( $file, $path ) = @_;
- if ($hashl->file_in_db($path)) {
+ if ( $hashl->file_in_db($path) ) {
say "\r\e[2K${file}";
}
+
+ return;
}
sub db_info {
printf(
"Read size: %d bytes (%s)\n",
- $hashl->read_size(),
- $hashl->si_size($hashl->read_size),
+ $hashl->read_size(), $hashl->si_size( $hashl->read_size ),
);
+
+ return;
}
sub file_info {
my ($file) = @_;
+
my $entry = $hashl->file($file);
- if (not $entry) {
+ if ( not $entry ) {
die("No such file in database\n");
}
printf(
"File: %s\nSize: %d bytes (%s)\nHash: %s\n",
- $file,
- $entry->{size},
- $hashl->si_size($entry->{size}),
+ $file, $entry->{size}, $hashl->si_size( $entry->{size} ),
$entry->{hash},
);
+
+ return;
}
sub process_file {
- my ($code, $write) = ($find_ref, $find_db_write);
+ my ( $code, $write ) = ( $find_ref, $find_db_write );
+
my $file = $File::Find::name;
my $path = $file;
- if (not -f $file or -l $file or $file eq $db_file) {
+ if ( not -f $file or -l $file or $file eq $db_file ) {
return;
}
if ($rel_paths) {
- $file = substr($file, length($base) + 1);
+ $file = substr( $file, length($base) + 1 );
}
$cur++;
@@ -159,86 +170,100 @@ sub process_file {
if ($show_progress) {
print STDERR $timer->report(
"\r\e[2KScanning directory: %p done, %L elapsed, %E remaining",
- $cur,
- );
+ $cur, );
}
- &{$code}($file, $path);
+ &{$code}( $file, $path );
- if ($write and (($cur % 5000) == 0)) {
+ if ( $write and ( ( $cur % 5000 ) == 0 ) ) {
$hashl->save($db_file);
}
+
+ return;
}
sub db_update {
- my ($file, $path) = @_;
+ my ( $file, $path ) = @_;
$hashl->add_file(
file => $file,
path => $path,
);
+
+ return;
}
sub db_ignore {
- my ($file, $path) = @_;
+ my ( $file, $path ) = @_;
+
+ $hashl->ignore( $file, $path );
- $hashl->ignore($file, $path);
+ return;
}
sub db_copy {
- my ($file, $path) = @_;
+ my ( $file, $path ) = @_;
- if (not $hashl->file_in_db($path)) {
- copy_file($path, $incoming_dir);
+ if ( not $hashl->file_in_db($path) ) {
+ copy_file( $path, $incoming_dir );
}
+
+ return;
}
sub mkdirs {
- my ($base, $new) = @_;
+ my ( $dir_base, $new ) = @_;
- for my $dir (split(qr{/}, $new)) {
- $base .= "/$dir";
- if (! -d $base) {
- mkdir($base)
- or die("Cannot create ${base}: $!\n");
+ for my $dir ( split( qr{/}, $new ) ) {
+ $dir_base .= "/$dir";
+ if ( !-d $dir_base ) {
+ mkdir($dir_base)
+ or die("Cannot create ${dir_base}: $!\n");
}
}
+
+ return;
}
sub prepare_db_run {
my ($dir) = @_;
+
$dir //= $base;
- if (not $show_progress) {
+ if ( not $show_progress ) {
return;
}
- find(\&get_total, $dir);
+ find( \&get_total, $dir );
$timer = Time::Progress->new();
$timer->attr(
min => 1,
max => $total,
);
+
return;
}
sub cmd_copy {
- prepare_db_run();
($incoming_dir) = @_;
- if (not $incoming_dir) {
+ prepare_db_run();
+
+ if ( not $incoming_dir ) {
usage();
}
- if (substr($incoming_dir, 0, 1) ne '/') {
- $incoming_dir = $base . '/' . $incoming_dir;
+ if ( substr( $incoming_dir, 0, 1 ) ne q{/} ) {
+ $incoming_dir = $base . q{/} . $incoming_dir;
}
- $find_ref = \&db_copy;
+ $find_ref = \&db_copy;
$find_db_write = 0;
- find(\&process_file, $base);
+ find( \&process_file, $base );
print "\n";
+
+ return;
}
sub cmd_find_known {
@@ -246,16 +271,18 @@ sub cmd_find_known {
$dir //= $base;
- if (substr($dir, 0, 1) ne '/') {
- $dir = $base . '/' . $dir;
+ if ( substr( $dir, 0, 1 ) ne q{/} ) {
+ $dir = $base . q{/} . $dir;
}
prepare_db_run($dir);
- $find_ref = \&db_find_known;
+ $find_ref = \&db_find_known;
$find_db_write = 0;
- find(\&process_file, $dir);
+ find( \&process_file, $dir );
print "\n";
+
+ return;
}
sub cmd_find_new {
@@ -263,16 +290,18 @@ sub cmd_find_new {
$new_dir //= $base;
- if (substr($new_dir, 0, 1) ne '/') {
- $new_dir = $base . '/' . $new_dir;
+ if ( substr( $new_dir, 0, 1 ) ne q{/} ) {
+ $new_dir = $base . q{/} . $new_dir;
}
prepare_db_run($new_dir);
- $find_ref = \&db_find_new;
+ $find_ref = \&db_find_new;
$find_db_write = 0;
- find(\&process_file, $new_dir);
+ find( \&process_file, $new_dir );
print "\n";
+
+ return;
}
sub cmd_ignore {
@@ -282,15 +311,17 @@ sub cmd_ignore {
prepare_db_run();
- if (substr($ign_dir, 0, 1) ne '/') {
- $ign_dir = $base . '/' . $ign_dir;
+ if ( substr( $ign_dir, 0, 1 ) ne q{/} ) {
+ $ign_dir = $base . q{/} . $ign_dir;
}
- $find_ref = \&db_ignore;
+ $find_ref = \&db_ignore;
$find_db_write = 1;
- find(\&process_file, $ign_dir);
+ find( \&process_file, $ign_dir );
$hashl->save($db_file);
print "\n";
+
+ return;
}
sub cmd_info {
@@ -302,44 +333,53 @@ sub cmd_info {
else {
db_info();
}
+
+ return;
}
sub cmd_list {
printf(
"# hashl v%s Read Size %d bytes (%s)\n",
- $VERSION,
- $hashl->read_size(),
- $hashl->si_size($hashl->read_size()),
+ $VERSION, $hashl->read_size(), $hashl->si_size( $hashl->read_size() ),
);
- for my $name (sort $hashl->files()) {
+
+ for my $name ( sort $hashl->files() ) {
my $file = $hashl->file($name);
- printf(
- "%s %-7s %s\n",
- $file->{hash},
- $hashl->si_size($file->{size}),
- $name
- );
+ printf( "%s %-7s %s\n",
+ $file->{hash}, $hashl->si_size( $file->{size} ), $name );
}
+
+ return;
}
sub cmd_list_files {
- say join("\n", sort $hashl->files());
+ say join( "\n", sort $hashl->files() );
+
+ return;
}
sub cmd_list_ignored {
- for my $hash ($hashl->ignored()) {
+ for my $hash ( $hashl->ignored() ) {
say $hash;
}
+
+ return;
}
sub cmd_update {
drop_deleted();
prepare_db_run();
- $find_ref = \&db_update;
+
+ $find_ref = \&db_update;
$find_db_write = 1;
- find(\&process_file, $base);
+
+ find( \&process_file, $base );
+
print "\n";
+
$hashl->save($db_file);
+
+ return;
}
given ($action) {
@@ -352,7 +392,7 @@ given ($action) {
when ('list-files') { cmd_list_files(@ARGV) }
when ('list-ignored') { cmd_list_ignored(@ARGV) }
when ('update') { cmd_update(@ARGV) }
- default { usage() }
+ default { usage() }
}
__END__
@@ -365,6 +405,10 @@ B<hashl> - Create database with partial file hashes, check if other files are in
B<hashl> [B<-d> I<dbfile>] [B<-s> I<read-size>] I<action> [I<args>]
+=head1 VERSION
+
+This manual documents hashl version 0.1
+
=head1 DESCRIPTION
Actions:
@@ -439,6 +483,10 @@ Print version information.
=back
+=head1 EXIT STATUS
+
+Unless an error occured, B<hashl> will always return zero.
+
=head1 CONFIGURATION
None, so far
diff --git a/lib/App/Hashl.pm b/lib/App/Hashl.pm
index e8015b2..6ca02bc 100644
--- a/lib/App/Hashl.pm
+++ b/lib/App/Hashl.pm
@@ -7,7 +7,164 @@ use 5.010;
use Digest::SHA qw(sha1_hex);
use Storable qw(nstore retrieve);
-my $VERSION = '0.1';
+our $VERSION = '0.1';
+
+sub new {
+ my ( $obj, %conf ) = @_;
+
+ my $ref = {
+ files => {},
+ ignored => {},
+ };
+
+ $ref->{config} = \%conf;
+ $ref->{config}->{read_size} //= ( 2**20 ) * 4; # 4 MiB
+
+ return bless( $ref, $obj );
+}
+
+sub new_from_file {
+ my ( $obj, $file ) = @_;
+ my $ref = retrieve($file);
+ return bless( $ref, $obj );
+}
+
+sub si_size {
+ my ( $self, $bytes ) = @_;
+ my @post = ( q{ }, qw(k M G T) );
+
+ while ( $bytes >= 1024 ) {
+ $bytes /= 1024;
+ shift @post;
+ }
+
+ return sprintf( '%6.1f%s', $bytes, $post[0] );
+}
+
+sub hash_file {
+ my ( $self, $file ) = @_;
+ my $data;
+
+ #<<< perltidy has problems indenting 'or die' with tabs
+
+ open( my $fh, '<', $file )
+ or die("Can't open ${file} for reading: $!\n");
+ binmode($fh)
+ or die("Can't set binmode on ${file}: $!\n");
+ read( $fh, $data, $self->{config}->{read_size} )
+ or die("Can't read ${file}: $!\n");
+ close($fh)
+ or die("Can't close ${file}: $!\n");
+
+ #>>>
+ return sha1_hex($data);
+}
+
+sub hash_in_db {
+ my ( $self, $hash ) = @_;
+
+ if ( $self->{ignored}->{$hash} ) {
+ return '// ignored';
+ }
+
+ for my $name ( $self->files() ) {
+ my $file = $self->file($name);
+
+ if ( $file->{hash} eq $hash ) {
+ return $name;
+ }
+ }
+ return;
+}
+
+sub file_in_db {
+ my ( $self, $file ) = @_;
+
+ return $self->hash_in_db( $self->hash_file($file) );
+}
+
+sub read_size {
+ my ($self) = @_;
+
+ return $self->{config}->{read_size};
+}
+
+sub file {
+ my ( $self, $name ) = @_;
+
+ return $self->{files}->{$name};
+}
+
+sub delete_file {
+ my ( $self, $name ) = @_;
+
+ delete $self->{files}->{$name};
+
+ return;
+}
+
+sub files {
+ my ($self) = @_;
+
+ return keys %{ $self->{files} };
+}
+
+sub add_file {
+ my ( $self, %data ) = @_;
+ my $file = $data{file};
+ my $path = $data{path};
+ my ( $size, $mtime ) = ( stat($path) )[ 7, 9 ];
+
+ if ( $self->file($file)
+ and $self->file($file)->{mtime} == $mtime
+ and $self->file($file)->{size} == $size )
+ {
+ return;
+ }
+
+ my $hash = $self->hash_file($path);
+
+ if ( $self->{ignored}->{$hash} ) {
+ return;
+ }
+
+ $self->{files}->{$file} = {
+ hash => $hash,
+ mtime => $mtime,
+ size => $size,
+ };
+
+ return 1;
+}
+
+sub ignored {
+ my ($self) = @_;
+
+ if ( exists $self->{ignored} ) {
+ return keys %{ $self->{ignored} };
+ }
+
+ return ();
+}
+
+sub ignore {
+ my ( $self, $file, $path ) = @_;
+
+ $self->delete_file($file);
+ $self->{ignored}->{ $self->hash_file($path) } = 1;
+
+ return 1;
+}
+
+sub save {
+ my ( $self, $file ) = @_;
+
+ return nstore( $self, $file );
+}
+
+1;
+
+__END__
=head1 NAME
@@ -47,126 +204,37 @@ How many bytes of a file to consider for the hash. Defaults to 4 MiB (4 *
=back
-=cut
-
-sub new {
- my ($obj, %conf) = @_;
- my $ref = {
- files => {},
- ignored => {},
- };
-
- $ref->{config} = \%conf;
- $ref->{config}->{read_size} //= (2 ** 20) * 4, # 4 MiB
-
- return bless($ref, $obj);
-}
-
=item $hashl = App::Hashl->new_from_file(I<$file>)
Returns the B<App::Hashl> object saved to I<file> by a prior $hashl->save
call.
-=cut
-
-sub new_from_file {
- my ($obj, $file) = @_;
- my $ref = retrieve($file);
- return bless($ref, $obj);
-}
-
=item $hashl->si_size(I<$bytes>)
Returns I<bytes> as a human-readable SI-size, such as "1.0k", "50.7M", "2.1G".
The returned string is always six characters long.
-=cut
-
-sub si_size {
- my ($self, $bytes) = @_;
- my @post = (' ', qw(k M G T));
-
- while ($bytes >= 1024) {
- $bytes /= 1024;
- shift @post;
- }
-
- return sprintf("%6.1f%s", $bytes, $post[0]);
-}
-
=item $hashl->hash_file(I<$file>)
Returns the SHA1 hash of the first n bytes (as configured via B<read_size>) of
I<file>
-=cut
-
-sub hash_file {
- my ($self, $file) = @_;
- my ($fh, $data);
-
- open($fh, '<', $file)
- or die("Can't open ${file} for reading: $!\n");;
- binmode($fh)
- or die("Can't set binmode on ${file}: $!\n");
- read($fh, $data, $self->{config}->{read_size})
- or die("Can't read ${file}: $!\n");
- close($fh)
- or die("Can't close ${file}: $!\n");
-
- return sha1_hex($data);
-}
-
=item $hashl->hash_in_db(I<$hash>)
Checks if I<hash> is in the database. If it is, returns the filename it is
associated with. If it is ignored, returns "// ignored" (subject to change).
-Otherwise, returns undef.
-
-=cut
-
-sub hash_in_db {
- my ($self, $hash) = @_;
-
- if ($self->{ignored}->{$hash}) {
- return '// ignored';
- }
-
- for my $name ($self->files()) {
- my $file = $self->file($name);
-
- if ($file->{hash} eq $hash) {
- return $name;
- }
- }
- return undef;
-}
+Otherwise, returns false.
=item $hashl->file_in_db(I<$file>)
Checks if I<file>'s hash is in the database. For the return value, see
B<hash_in_db>.
-=cut
-
-sub file_in_db {
- my ($self, $file) = @_;
-
- return $self->hash_in_db($self->hash_file($file));
-}
-
=item $hashl->read_size()
Returns the current read size. Note that once an B<App::Hashl> object has
been created, it is not possible to change the read size.
-=cut
-
-sub read_size {
- my ($self) = @_;
- return $self->{config}->{read_size};
-}
-
=item $hashl->file(I<$name>)
Returns a hashref describing the file. The layout is as follows:
@@ -175,35 +243,14 @@ Returns a hashref describing the file. The layout is as follows:
mtime => mtime as UNIX timestamp,
size => file size in bytes,
-=cut
-
-sub file {
- my ($self, $name) = @_;
- return $self->{files}->{$name};
-}
-
=item $hashl->delete_file(I<$name>)
Remove the file from the database
-=cut
-
-sub delete_file {
- my ($self, $name) = @_;
- delete $self->{files}->{$name};
-}
-
=item $hashl->files()
Returns a list of all file names in the database
-=cut
-
-sub files {
- my ($self) = @_;
- return keys %{ $self->{files} };
-}
-
=item $hashl->add_file(I<%data>)
Add a file to the database. Required keys in I<%data> are:
@@ -223,84 +270,33 @@ Full path to the file
If the file already is in the database, it is only updated if both the file
size and the mtime have changed.
-=cut
-
-sub add_file {
- my ($self, %data) = @_;
- my $file = $data{file};
- my $path = $data{path};
- my ($size, $mtime) = (stat($path))[7,9];
-
- if ($self->file($file) and
- $self->file($file)->{mtime} == $mtime and
- $self->file($file)->{size} == $size ) {
- return;
- }
-
- my $hash = $self->hash_file($path);
-
- if ($self->{ignored}->{$hash}) {
- return;
- }
-
- $self->{files}->{$file} = {
- hash => $hash,
- mtime => $mtime,
- size => $size,
- };
-}
-
=item $hashl->ignored()
Returns a list of all ignored file hashes
-=cut
-
-sub ignored {
- my ($self) = @_;
- if (exists $self->{ignored}) {
- return keys %{ $self->{ignored} };
- }
- else {
- return ();
- }
-}
-
=item $hashl->ignore(I<$file>, I<$path>)
Removes I<$file> from the database and adds I<$path> to the list of ignored
file hashes.
-=cut
-
-sub ignore {
- my ($self, $file, $path) = @_;
-
- $self->delete_file($file);
- $self->{ignored}->{ $self->hash_file($path) } = 1;
-}
-
=item $hashl->save(I<$file>)
Save the B<App::Hashl> object with all data to I<$file>. It can later be
retrieved via B<new_from_file>.
-=cut
+=back
-sub save {
- my ($self, $file) = @_;
- nstore($self, $file);
-}
+=head1 DIAGNOSTICS
-1;
+FIXME
-__END__
+=head1 DEPENDENCIES
-=back
+Digest::SHA(3pm);
-=head1 DEPENDENCIES
+=head1 BUGS AND LIMITATIONS
-B<Digest::SHA>.
+FIXME
=head1 AUTHOR