App::Hashl documentation

author: Daniel Friesel <derf@finalrewind.org> 2011-05-18 03:11:30 +0200
committer: Daniel Friesel <derf@finalrewind.org> 2011-05-18 03:11:30 +0200
commit: 21df787c85539e149fdcf98155e37d0237105b64 (patch)
tree: c44c46551c87cfa195546b274775d8449cd70666
parent: 8b12cdc391d6cfedf45b780e37263711aa448a87 (diff)
3 files changed, 141 insertions, 26 deletions
diff --git a/bin/hashl b/bin/hashl
index 89c0219..3948821 100755
--- a/bin/hashl
+++ b/bin/hashl
@@ -165,13 +165,10 @@ sub process_file {
 
 sub db_update {
 	my ($file, $path) = @_;
-	my ($size, $mtime) = (stat($path))[7,9];
 
 	$hashl->add_file(
 		file => $file,
-		mtime => $mtime,
 		path => $path,
-		size => $size,
 	);
 }
 
diff --git a/lib/App/Hashl.pm b/lib/App/Hashl.pm
index 9ce17a5..56c925b 100644
--- a/lib/App/Hashl.pm
+++ b/lib/App/Hashl.pm
@@ -21,12 +21,34 @@ App::Hashl - Partially hash files, check if files are equal etc.
     my $hashl = App::Hashl->new();
     # or: App::Hashl->new_from_file($database_file);
 
-    for my $file (@files) {
-        $hashl->add_file($file, {
-            hash
+=head1 VERSION
 
-=cut
+This manual documents App::Hashl version 0.2
+
+=head1 DESCRIPTION
+
+App::Hashl contains utilities to hash the first n bytes of a file, store and
+recall this, check if another file is already in the database and optionally
+ignore file hashes.
+
+=head1 METHODS
+
+=over
+
+=item $hashl = App::Hashl->new(I<%conf>)
+
+Returns a new B<App::Hashl> object. Accepted parameters are:
+
+=over
 
+=item B<read_size> => I<bytes>
+
+How many bytes of a file to consider for the hash.  Defaults to 4 MiB (4 *
+2**20 bytes).
+
+=back
+
+=cut
 
 sub new {
 	my ($obj, %conf) = @_;
@@ -41,12 +63,26 @@ sub new {
 	return bless($ref, $obj);
 }
 
+=item $hashl = App::Hashl->new_from_file(I<$file>)
+
+Returns the B<App::Hashl> object saved to I<file> by a prior $hashl->save
+call.
+
+=cut
+
 sub new_from_file {
 	my ($obj, $file) = @_;
 	my $ref = retrieve($file);
 	return bless($ref, $obj);
 }
 
+=item $hashl->si_size(I<$bytes>)
+
+Returns I<bytes> as a human-readable SI-size, such as "1.0k", "50.7M", "2.1G".
+The returned string is always sex characters long.
+
+=cut
+
 sub si_size {
 	my ($self, $bytes) = @_;
 	my @post = (' ', qw(k M G T));
@@ -59,6 +95,13 @@ sub si_size {
 	return sprintf("%6.1f%s", $bytes, $post[0]);
 }
 
+=item $hashl->hash_file(I<$file>)
+
+Returns the SHA1 hash of the first n bytes (as configured via B<read_size>) of
+I<file>
+
+=cut
+
 sub hash_file {
 	my ($self, $file) = @_;
 	my ($fh, $data);
@@ -71,6 +114,14 @@ sub hash_file {
 	return sha1_hex($data);
 }
 
+=item $hashl->hash_in_db(I<$hash>)
+
+Checks if I<hash> is in the database.  If it is, returns the filename it is
+associated with.  If it is ignored, returns "// ignored" (subject to change).
+Otherwise, returns undef.
+
+=cut
+
 sub hash_in_db {
 	my ($self, $hash) = @_;
 
@@ -92,50 +143,114 @@ sub hash_in_db {
 	return undef;
 }
 
+=item $hashl->file_in_db(I<$file>)
+
+Checks if I<file>'s hash is in the database.  For the return value, see
+B<hash_in_db>.
+
+=cut
+
 sub file_in_db {
 	my ($self, $file) = @_;
 
 	return $self->hash_in_db($self->hash_file($file));
 }
 
+=item $hashl->read_size()
+
+Returns the current read size.  Note that once an B<App::Hashl> object has
+been created, it is not possible to change the read size.
+
+=cut
+
 sub read_size {
 	my ($self) = @_;
 	return $self->{config}->{read_size};
 }
 
+=item $hashl->file(I<$name>)
+
+Returns a hashref describing the file. The layout is as follows:
+
+    hash => file's hash,
+    mtime => mtime as UNIX timestamp,
+    size => file size in bytes,
+
+=cut
+
 sub file {
 	my ($self, $name) = @_;
 	return $self->{files}->{$name};
 }
 
+=item $hashl->delete_file(I<$name>)
+
+Remove the file from the database
+
+=cut
+
 sub delete_file {
 	my ($self, $name) = @_;
 	delete $self->{files}->{$name};
 }
 
+=item $hashl->files()
+
+Returns a list of all file names in the database
+
+=cut
+
 sub files {
 	my ($self) = @_;
 	return sort keys %{ $self->{files} };
 }
 
+=item $hashl->add_file(I<%data>)
+
+Add a file to the database. Required keys in I<%data> are:
+
+=over
+
+=item B<file> => I<name>
+
+relateve file name to store in the database
+
+=item B<path> => I<path>
+
+Full path to the file
+
+=back
+
+If the file already is in the database, it is only updated if both the file
+size and the mtime have changed.
+
+=cut
+
 sub add_file {
 	my ($self, %data) = @_;
 	my $file = $data{file};
 	my $path = $data{path};
+	my ($size, $mtime) = (stat($path))[7,9];
 
 	if ($self->file($file) and
-			$self->file($file)->{mtime} == $data{mtime} and
-			$self->file($file)->{size} == $data{size} ) {
+			$self->file($file)->{mtime} == $mtime and
+			$self->file($file)->{size} == $size ) {
 		return;
 	}
 
 	$self->{files}->{$file} = {
 		hash  => $self->hash_file($file),
-		mtime => $data{mtime},
-		size  => $data{size},
+		mtime => $mtime,
+		size  => $size,
 	};
 }
 
+=item $hashl->ignored()
+
+Returns a list of all ignored file hashes
+
+=cut
+
 sub ignored {
 	my ($self) = @_;
 	if (exists $self->{ignored}->{hashes}) {
@@ -146,6 +261,13 @@ sub ignored {
 	}
 }
 
+=item $hashl->ignore(I<$file>, I<$path>)
+
+Removes I<$file> from the database and adds I<$path> to the list of ignored
+file hashes.
+
+=cut
+
 sub ignore {
 	my ($self, $file, $path) = @_;
 
@@ -153,6 +275,13 @@ sub ignore {
 	push(@{ $self->{ignored}->{hashes} }, $self->hash_file($path));
 }
 
+=item $hashl->save(I<$file>)
+
+Save the B<App::Hashl> object with all data to I<$file>.  It can later be
+retrieved via B<new_from_file>.
+
+=cut
+
 sub save {
 	my ($self, $file) = @_;
 	nstore($self, $file);
@@ -162,21 +291,11 @@ sub save {
 
 __END__
 
-=head1 NAME
-
-=head1 SYNOPSIS
-
-=head1 DESCRIPTION
-
-=head1 METHODS
-
-=over
-
 =back
 
 =head1 DEPENDENCIES
 
-=head1 SEE ALSO
+B<Digest::SHA>.
 
 =head1 AUTHOR
 
diff --git a/t/29-app-hashl.t b/t/29-app-hashl.t
index 608a3a2..20d0734 100644
--- a/t/29-app-hashl.t
+++ b/t/29-app-hashl.t
@@ -29,19 +29,18 @@ is_deeply([$hashl->files()], [], 'no files in empty db');
 is_deeply([$hashl->ignored()], [], 'no ignored files in empty db');
 
 my $test_hash = $hashl->hash_file('t/in/4');
+my ($test_size, $test_mtime) = (stat('t/in/4'))[7,9];
 ok($hashl->add_file(
 		file => 't/in/4',
 		path => 't/in/4',
-		mtime => 123,
-		size => 4,
 	),
 	'Add new file'
 );
 is_deeply($hashl->file('t/in/4'),
 	{
 		hash => $test_hash,
-		size => 4,
-		mtime => 123,
+		size => $test_size,
+		mtime => $test_mtime,
 	},
 	'hashl->file okay'
 );
author	Daniel Friesel <derf@finalrewind.org>	2011-05-18 03:11:30 +0200
committer	Daniel Friesel <derf@finalrewind.org>	2011-05-18 03:11:30 +0200
commit	21df787c85539e149fdcf98155e37d0237105b64 (patch)
tree	c44c46551c87cfa195546b274775d8449cd70666
parent	8b12cdc391d6cfedf45b780e37263711aa448a87 (diff)