From c0c056c9e09ecd2f95a561d277dabef8ebb67b9d Mon Sep 17 00:00:00 2001
From: Daniel Friesel <derf@finalrewind.org>
Date: Fri, 20 May 2011 15:50:14 +0200
Subject: Code cleanup

---
 bin/hashl        | 226 ++++++++++++++++++++++---------------
 lib/App/Hashl.pm | 336 +++++++++++++++++++++++++++----------------------------
 2 files changed, 303 insertions(+), 259 deletions(-)

diff --git a/bin/hashl b/bin/hashl
index aaf17b9..ec93f24 100755
--- a/bin/hashl
+++ b/bin/hashl
@@ -14,20 +14,20 @@ use Getopt::Long;
 use IO::Handle;
 use Time::Progress;
 
-my $base = getcwd();
-my $rel_paths = 1;
-my $db_file = '.hashl.db';
-my $total = 0;
-my $cur = 0;
+my $base          = getcwd();
+my $rel_paths     = 1;
+my $db_file       = '.hashl.db';
+my $total         = 0;
+my $cur           = 0;
 my $show_progress = 1;
 my $timer;
 my $incoming_dir;
 my $read_size;
-my ($find_ref, $find_db_write);
+my ( $find_ref, $find_db_write );
 
 my $hashl;
 
-my $VERSION = '0.1';
+our $VERSION = '0.1';
 
 STDERR->autoflush(1);
 
@@ -38,7 +38,7 @@ GetOptions(
 	'V|version'     => sub { say "hashl version ${VERSION}"; exit 0 },
 ) or usage();
 
-if (substr($db_file, 0, 1) ne '/') {
+if ( substr( $db_file, 0, 1 ) ne q{/} ) {
 	$db_file = "${base}/${db_file}";
 }
 
@@ -49,24 +49,21 @@ sub usage {
 Usage: $0 [options] <update|list|info|...> [args]
 See 'perldoc -F $0' (or 'man hashl' if it is properly installed)
 EOF
-
 }
 
-if (not defined $action) {
+if ( not defined $action ) {
 	usage();
 }
 
-if (-r $db_file) {
+if ( -r $db_file ) {
 	$hashl = App::Hashl->new_from_file($db_file);
 }
 else {
-	$hashl = App::Hashl->new(
-		read_size => $read_size,
-	);
+	$hashl = App::Hashl->new( read_size => $read_size );
 }
 
-$SIG{INT} = \&quit_save_db;
-$SIG{TERM} = \&quit_save_db;
+local $SIG{INT}  = \&quit_save_db;
+local $SIG{TERM} = \&quit_save_db;
 
 sub quit_save_db {
 	$hashl->save($db_file);
@@ -75,83 +72,97 @@ sub quit_save_db {
 
 sub get_total {
 	my $file = $File::Find::name;
-	if (-f $file and not -l $file and $file ne $db_file) {
+
+	if ( -f $file and not -l $file and $file ne $db_file ) {
 		$total++;
 	}
+
+	return;
 }
 
 sub drop_deleted {
-	for my $file ($hashl->files()) {
-		if (! -e $file) {
+	for my $file ( $hashl->files() ) {
+		if ( not -e $file ) {
 			$hashl->delete_file($file);
 		}
 	}
+
+	return;
 }
 
 sub copy_file {
-	my ($file, $to) = @_;
+	my ( $file, $to ) = @_;
 
-	my $base = substr($file, length($base) + 1);
-	if ($base =~ s{ / [^/]+ $}{}x) {
-		mkdirs($incoming_dir, $base);
+	my $cp_base = substr( $file, length($base) + 1 );
+	if ( $base =~ s{ / [^/]+ $}{}x ) {
+		mkdirs( $incoming_dir, $cp_base );
 	}
 
-	copy($file, "${to}/${base}")
-		or die("Cannot copy ${file} to ${to}/${base}: $!\n");
+	copy( $file, "${to}/${cp_base}" )
+	  or die("Cannot copy ${file} to ${to}/${cp_base}: $!\n");
+
+	return;
 }
 
 sub db_find_new {
-	my ($file, $path) = @_;
+	my ( $file, $path ) = @_;
 
-	if (not $hashl->file_in_db($path)) {
+	if ( not $hashl->file_in_db($path) ) {
 		say "\r\e[2K${file}";
 	}
+
+	return;
 }
 
 sub db_find_known {
-	my ($file, $path) = @_;
+	my ( $file, $path ) = @_;
 
-	if ($hashl->file_in_db($path)) {
+	if ( $hashl->file_in_db($path) ) {
 		say "\r\e[2K${file}";
 	}
+
+	return;
 }
 
 sub db_info {
 	printf(
 		"Read size: %d bytes (%s)\n",
-		$hashl->read_size(),
-		$hashl->si_size($hashl->read_size),
+		$hashl->read_size(), $hashl->si_size( $hashl->read_size ),
 	);
+
+	return;
 }
 
 sub file_info {
 	my ($file) = @_;
+
 	my $entry = $hashl->file($file);
 
-	if (not $entry) {
+	if ( not $entry ) {
 		die("No such file in database\n");
 	}
 
 	printf(
 		"File: %s\nSize: %d bytes (%s)\nHash: %s\n",
-		$file,
-		$entry->{size},
-		$hashl->si_size($entry->{size}),
+		$file, $entry->{size}, $hashl->si_size( $entry->{size} ),
 		$entry->{hash},
 	);
+
+	return;
 }
 
 sub process_file {
-	my ($code, $write) = ($find_ref, $find_db_write);
+	my ( $code, $write ) = ( $find_ref, $find_db_write );
+
 	my $file = $File::Find::name;
 	my $path = $file;
 
-	if (not -f $file or -l $file or $file eq $db_file) {
+	if ( not -f $file or -l $file or $file eq $db_file ) {
 		return;
 	}
 
 	if ($rel_paths) {
-		$file = substr($file, length($base) + 1);
+		$file = substr( $file, length($base) + 1 );
 	}
 
 	$cur++;
@@ -159,86 +170,100 @@ sub process_file {
 	if ($show_progress) {
 		print STDERR $timer->report(
 			"\r\e[2KScanning directory: %p done, %L elapsed, %E remaining",
-			$cur,
-		);
+			$cur, );
 	}
 
-	&{$code}($file, $path);
+	&{$code}( $file, $path );
 
-	if ($write and (($cur % 5000) == 0)) {
+	if ( $write and ( ( $cur % 5000 ) == 0 ) ) {
 		$hashl->save($db_file);
 	}
+
+	return;
 }
 
 sub db_update {
-	my ($file, $path) = @_;
+	my ( $file, $path ) = @_;
 
 	$hashl->add_file(
 		file => $file,
 		path => $path,
 	);
+
+	return;
 }
 
 sub db_ignore {
-	my ($file, $path) = @_;
+	my ( $file, $path ) = @_;
+
+	$hashl->ignore( $file, $path );
 
-	$hashl->ignore($file, $path);
+	return;
 }
 
 sub db_copy {
-	my ($file, $path) = @_;
+	my ( $file, $path ) = @_;
 
-	if (not $hashl->file_in_db($path)) {
-		copy_file($path, $incoming_dir);
+	if ( not $hashl->file_in_db($path) ) {
+		copy_file( $path, $incoming_dir );
 	}
+
+	return;
 }
 
 sub mkdirs {
-	my ($base, $new) = @_;
+	my ( $dir_base, $new ) = @_;
 
-	for my $dir (split(qr{/}, $new)) {
-		$base .= "/$dir";
-		if (! -d $base) {
-			mkdir($base)
-				or die("Cannot create ${base}: $!\n");
+	for my $dir ( split( qr{/}, $new ) ) {
+		$dir_base .= "/$dir";
+		if ( !-d $dir_base ) {
+			mkdir($dir_base)
+			  or die("Cannot create ${dir_base}: $!\n");
 		}
 	}
+
+	return;
 }
 
 sub prepare_db_run {
 	my ($dir) = @_;
+
 	$dir //= $base;
 
-	if (not $show_progress) {
+	if ( not $show_progress ) {
 		return;
 	}
 
-	find(\&get_total, $dir);
+	find( \&get_total, $dir );
 
 	$timer = Time::Progress->new();
 	$timer->attr(
 		min => 1,
 		max => $total,
 	);
+
 	return;
 }
 
 sub cmd_copy {
-	prepare_db_run();
 	($incoming_dir) = @_;
 
-	if (not $incoming_dir) {
+	prepare_db_run();
+
+	if ( not $incoming_dir ) {
 		usage();
 	}
 
-	if (substr($incoming_dir, 0, 1) ne '/') {
-		$incoming_dir = $base . '/' . $incoming_dir;
+	if ( substr( $incoming_dir, 0, 1 ) ne q{/} ) {
+		$incoming_dir = $base . q{/} . $incoming_dir;
 	}
 
-	$find_ref = \&db_copy;
+	$find_ref      = \&db_copy;
 	$find_db_write = 0;
-	find(\&process_file, $base);
+	find( \&process_file, $base );
 	print "\n";
+
+	return;
 }
 
 sub cmd_find_known {
@@ -246,16 +271,18 @@ sub cmd_find_known {
 
 	$dir //= $base;
 
-	if (substr($dir, 0, 1) ne '/') {
-		$dir = $base . '/' . $dir;
+	if ( substr( $dir, 0, 1 ) ne q{/} ) {
+		$dir = $base . q{/} . $dir;
 	}
 
 	prepare_db_run($dir);
 
-	$find_ref = \&db_find_known;
+	$find_ref      = \&db_find_known;
 	$find_db_write = 0;
-	find(\&process_file, $dir);
+	find( \&process_file, $dir );
 	print "\n";
+
+	return;
 }
 
 sub cmd_find_new {
@@ -263,16 +290,18 @@ sub cmd_find_new {
 
 	$new_dir //= $base;
 
-	if (substr($new_dir, 0, 1) ne '/') {
-		$new_dir = $base . '/' . $new_dir;
+	if ( substr( $new_dir, 0, 1 ) ne q{/} ) {
+		$new_dir = $base . q{/} . $new_dir;
 	}
 
 	prepare_db_run($new_dir);
 
-	$find_ref = \&db_find_new;
+	$find_ref      = \&db_find_new;
 	$find_db_write = 0;
-	find(\&process_file, $new_dir);
+	find( \&process_file, $new_dir );
 	print "\n";
+
+	return;
 }
 
 sub cmd_ignore {
@@ -282,15 +311,17 @@ sub cmd_ignore {
 
 	prepare_db_run();
 
-	if (substr($ign_dir, 0, 1) ne '/') {
-		$ign_dir = $base . '/' . $ign_dir;
+	if ( substr( $ign_dir, 0, 1 ) ne q{/} ) {
+		$ign_dir = $base . q{/} . $ign_dir;
 	}
 
-	$find_ref = \&db_ignore;
+	$find_ref      = \&db_ignore;
 	$find_db_write = 1;
-	find(\&process_file, $ign_dir);
+	find( \&process_file, $ign_dir );
 	$hashl->save($db_file);
 	print "\n";
+
+	return;
 }
 
 sub cmd_info {
@@ -302,44 +333,53 @@ sub cmd_info {
 	else {
 		db_info();
 	}
+
+	return;
 }
 
 sub cmd_list {
 	printf(
 		"# hashl v%s   Read Size %d bytes (%s)\n",
-		$VERSION,
-		$hashl->read_size(),
-		$hashl->si_size($hashl->read_size()),
+		$VERSION, $hashl->read_size(), $hashl->si_size( $hashl->read_size() ),
 	);
-	for my $name (sort $hashl->files()) {
+
+	for my $name ( sort $hashl->files() ) {
 		my $file = $hashl->file($name);
-		printf(
-			"%s %-7s %s\n",
-			$file->{hash},
-			$hashl->si_size($file->{size}),
-			$name
-		);
+		printf( "%s %-7s %s\n",
+			$file->{hash}, $hashl->si_size( $file->{size} ), $name );
 	}
+
+	return;
 }
 
 sub cmd_list_files {
-	say join("\n", sort $hashl->files());
+	say join( "\n", sort $hashl->files() );
+
+	return;
 }
 
 sub cmd_list_ignored {
-	for my $hash ($hashl->ignored()) {
+	for my $hash ( $hashl->ignored() ) {
 		say $hash;
 	}
+
+	return;
 }
 
 sub cmd_update {
 	drop_deleted();
 	prepare_db_run();
-	$find_ref = \&db_update;
+
+	$find_ref      = \&db_update;
 	$find_db_write = 1;
-	find(\&process_file, $base);
+
+	find( \&process_file, $base );
+
 	print "\n";
+
 	$hashl->save($db_file);
+
+	return;
 }
 
 given ($action) {
@@ -352,7 +392,7 @@ given ($action) {
 	when ('list-files')   { cmd_list_files(@ARGV) }
 	when ('list-ignored') { cmd_list_ignored(@ARGV) }
 	when ('update')       { cmd_update(@ARGV) }
-	default { usage() }
+	default               { usage() }
 }
 
 __END__
@@ -365,6 +405,10 @@ B<hashl> - Create database with partial file hashes, check if other files are in
 
 B<hashl> [B<-d> I<dbfile>] [B<-s> I<read-size>] I<action> [I<args>]
 
+=head1 VERSION
+
+This manual documents hashl version 0.1
+
 =head1 DESCRIPTION
 
 Actions:
@@ -439,6 +483,10 @@ Print version information.
 
 =back
 
+=head1 EXIT STATUS
+
+Unless an error occured, B<hashl> will always return zero.
+
 =head1 CONFIGURATION
 
 None, so far
diff --git a/lib/App/Hashl.pm b/lib/App/Hashl.pm
index e8015b2..6ca02bc 100644
--- a/lib/App/Hashl.pm
+++ b/lib/App/Hashl.pm
@@ -7,7 +7,164 @@ use 5.010;
 use Digest::SHA qw(sha1_hex);
 use Storable qw(nstore retrieve);
 
-my $VERSION = '0.1';
+our $VERSION = '0.1';
+
+sub new {
+	my ( $obj, %conf ) = @_;
+
+	my $ref = {
+		files   => {},
+		ignored => {},
+	};
+
+	$ref->{config} = \%conf;
+	$ref->{config}->{read_size} //= ( 2**20 ) * 4;    # 4 MiB
+
+	return bless( $ref, $obj );
+}
+
+sub new_from_file {
+	my ( $obj, $file ) = @_;
+	my $ref = retrieve($file);
+	return bless( $ref, $obj );
+}
+
+sub si_size {
+	my ( $self, $bytes ) = @_;
+	my @post = ( q{ }, qw(k M G T) );
+
+	while ( $bytes >= 1024 ) {
+		$bytes /= 1024;
+		shift @post;
+	}
+
+	return sprintf( '%6.1f%s', $bytes, $post[0] );
+}
+
+sub hash_file {
+	my ( $self, $file ) = @_;
+	my $data;
+
+	#<<< perltidy has problems indenting 'or die' with tabs
+
+	open( my $fh, '<', $file )
+		or die("Can't open ${file} for reading: $!\n");
+	binmode($fh)
+		or die("Can't set binmode on ${file}: $!\n");
+	read( $fh, $data, $self->{config}->{read_size} )
+		or die("Can't read ${file}: $!\n");
+	close($fh)
+		or die("Can't close ${file}: $!\n");
+
+	#>>>
+	return sha1_hex($data);
+}
+
+sub hash_in_db {
+	my ( $self, $hash ) = @_;
+
+	if ( $self->{ignored}->{$hash} ) {
+		return '// ignored';
+	}
+
+	for my $name ( $self->files() ) {
+		my $file = $self->file($name);
+
+		if ( $file->{hash} eq $hash ) {
+			return $name;
+		}
+	}
+	return;
+}
+
+sub file_in_db {
+	my ( $self, $file ) = @_;
+
+	return $self->hash_in_db( $self->hash_file($file) );
+}
+
+sub read_size {
+	my ($self) = @_;
+
+	return $self->{config}->{read_size};
+}
+
+sub file {
+	my ( $self, $name ) = @_;
+
+	return $self->{files}->{$name};
+}
+
+sub delete_file {
+	my ( $self, $name ) = @_;
+
+	delete $self->{files}->{$name};
+
+	return;
+}
+
+sub files {
+	my ($self) = @_;
+
+	return keys %{ $self->{files} };
+}
+
+sub add_file {
+	my ( $self, %data ) = @_;
+	my $file = $data{file};
+	my $path = $data{path};
+	my ( $size, $mtime ) = ( stat($path) )[ 7, 9 ];
+
+	if (    $self->file($file)
+		and $self->file($file)->{mtime} == $mtime
+		and $self->file($file)->{size} == $size )
+	{
+		return;
+	}
+
+	my $hash = $self->hash_file($path);
+
+	if ( $self->{ignored}->{$hash} ) {
+		return;
+	}
+
+	$self->{files}->{$file} = {
+		hash  => $hash,
+		mtime => $mtime,
+		size  => $size,
+	};
+
+	return 1;
+}
+
+sub ignored {
+	my ($self) = @_;
+
+	if ( exists $self->{ignored} ) {
+		return keys %{ $self->{ignored} };
+	}
+
+	return ();
+}
+
+sub ignore {
+	my ( $self, $file, $path ) = @_;
+
+	$self->delete_file($file);
+	$self->{ignored}->{ $self->hash_file($path) } = 1;
+
+	return 1;
+}
+
+sub save {
+	my ( $self, $file ) = @_;
+
+	return nstore( $self, $file );
+}
+
+1;
+
+__END__
 
 =head1 NAME
 
@@ -47,126 +204,37 @@ How many bytes of a file to consider for the hash.  Defaults to 4 MiB (4 *
 
 =back
 
-=cut
-
-sub new {
-	my ($obj, %conf) = @_;
-	my $ref = {
-		files => {},
-		ignored => {},
-	};
-
-	$ref->{config} = \%conf;
-	$ref->{config}->{read_size} //=  (2 ** 20) * 4, # 4 MiB
-
-	return bless($ref, $obj);
-}
-
 =item $hashl = App::Hashl->new_from_file(I<$file>)
 
 Returns the B<App::Hashl> object saved to I<file> by a prior $hashl->save
 call.
 
-=cut
-
-sub new_from_file {
-	my ($obj, $file) = @_;
-	my $ref = retrieve($file);
-	return bless($ref, $obj);
-}
-
 =item $hashl->si_size(I<$bytes>)
 
 Returns I<bytes> as a human-readable SI-size, such as "1.0k", "50.7M", "2.1G".
 The returned string is always six characters long.
 
-=cut
-
-sub si_size {
-	my ($self, $bytes) = @_;
-	my @post = (' ', qw(k M G T));
-
-	while ($bytes >= 1024) {
-		$bytes /= 1024;
-		shift @post;
-	}
-
-	return sprintf("%6.1f%s", $bytes, $post[0]);
-}
-
 =item $hashl->hash_file(I<$file>)
 
 Returns the SHA1 hash of the first n bytes (as configured via B<read_size>) of
 I<file>
 
-=cut
-
-sub hash_file {
-	my ($self, $file) = @_;
-	my ($fh, $data);
-
-	open($fh, '<', $file)
-		or die("Can't open ${file} for reading: $!\n");;
-	binmode($fh)
-		or die("Can't set binmode on ${file}: $!\n");
-	read($fh, $data, $self->{config}->{read_size})
-		or die("Can't read ${file}: $!\n");
-	close($fh)
-		or die("Can't close ${file}: $!\n");
-
-	return sha1_hex($data);
-}
-
 =item $hashl->hash_in_db(I<$hash>)
 
 Checks if I<hash> is in the database.  If it is, returns the filename it is
 associated with.  If it is ignored, returns "// ignored" (subject to change).
-Otherwise, returns undef.
-
-=cut
-
-sub hash_in_db {
-	my ($self, $hash) = @_;
-
-	if ($self->{ignored}->{$hash}) {
-		return '// ignored';
-	}
-
-	for my $name ($self->files()) {
-		my $file = $self->file($name);
-
-		if ($file->{hash} eq $hash) {
-			return $name;
-		}
-	}
-	return undef;
-}
+Otherwise, returns false.
 
 =item $hashl->file_in_db(I<$file>)
 
 Checks if I<file>'s hash is in the database.  For the return value, see
 B<hash_in_db>.
 
-=cut
-
-sub file_in_db {
-	my ($self, $file) = @_;
-
-	return $self->hash_in_db($self->hash_file($file));
-}
-
 =item $hashl->read_size()
 
 Returns the current read size.  Note that once an B<App::Hashl> object has
 been created, it is not possible to change the read size.
 
-=cut
-
-sub read_size {
-	my ($self) = @_;
-	return $self->{config}->{read_size};
-}
-
 =item $hashl->file(I<$name>)
 
 Returns a hashref describing the file. The layout is as follows:
@@ -175,35 +243,14 @@ Returns a hashref describing the file. The layout is as follows:
     mtime => mtime as UNIX timestamp,
     size => file size in bytes,
 
-=cut
-
-sub file {
-	my ($self, $name) = @_;
-	return $self->{files}->{$name};
-}
-
 =item $hashl->delete_file(I<$name>)
 
 Remove the file from the database
 
-=cut
-
-sub delete_file {
-	my ($self, $name) = @_;
-	delete $self->{files}->{$name};
-}
-
 =item $hashl->files()
 
 Returns a list of all file names in the database
 
-=cut
-
-sub files {
-	my ($self) = @_;
-	return keys %{ $self->{files} };
-}
-
 =item $hashl->add_file(I<%data>)
 
 Add a file to the database. Required keys in I<%data> are:
@@ -223,84 +270,33 @@ Full path to the file
 If the file already is in the database, it is only updated if both the file
 size and the mtime have changed.
 
-=cut
-
-sub add_file {
-	my ($self, %data) = @_;
-	my $file = $data{file};
-	my $path = $data{path};
-	my ($size, $mtime) = (stat($path))[7,9];
-
-	if ($self->file($file) and
-			$self->file($file)->{mtime} == $mtime and
-			$self->file($file)->{size} == $size ) {
-		return;
-	}
-
-	my $hash = $self->hash_file($path);
-
-	if ($self->{ignored}->{$hash}) {
-		return;
-	}
-
-	$self->{files}->{$file} = {
-		hash  => $hash,
-		mtime => $mtime,
-		size  => $size,
-	};
-}
-
 =item $hashl->ignored()
 
 Returns a list of all ignored file hashes
 
-=cut
-
-sub ignored {
-	my ($self) = @_;
-	if (exists $self->{ignored}) {
-		return keys %{ $self->{ignored} };
-	}
-	else {
-		return ();
-	}
-}
-
 =item $hashl->ignore(I<$file>, I<$path>)
 
 Removes I<$file> from the database and adds I<$path> to the list of ignored
 file hashes.
 
-=cut
-
-sub ignore {
-	my ($self, $file, $path) = @_;
-
-	$self->delete_file($file);
-	$self->{ignored}->{ $self->hash_file($path) } = 1;
-}
-
 =item $hashl->save(I<$file>)
 
 Save the B<App::Hashl> object with all data to I<$file>.  It can later be
 retrieved via B<new_from_file>.
 
-=cut
+=back
 
-sub save {
-	my ($self, $file) = @_;
-	nstore($self, $file);
-}
+=head1 DIAGNOSTICS
 
-1;
+FIXME
 
-__END__
+=head1 DEPENDENCIES
 
-=back
+Digest::SHA(3pm);
 
-=head1 DEPENDENCIES
+=head1 BUGS AND LIMITATIONS
 
-B<Digest::SHA>.
+FIXME
 
 =head1 AUTHOR
 
-- 
cgit v1.2.3