diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/App/Hashl.pm | 157 |
1 files changed, 138 insertions, 19 deletions
diff --git a/lib/App/Hashl.pm b/lib/App/Hashl.pm index 9ce17a5..56c925b 100644 --- a/lib/App/Hashl.pm +++ b/lib/App/Hashl.pm @@ -21,12 +21,34 @@ App::Hashl - Partially hash files, check if files are equal etc. my $hashl = App::Hashl->new(); # or: App::Hashl->new_from_file($database_file); - for my $file (@files) { - $hashl->add_file($file, { - hash +=head1 VERSION -=cut +This manual documents App::Hashl version 0.2 + +=head1 DESCRIPTION + +App::Hashl contains utilities to hash the first n bytes of a file, store and +recall this, check if another file is already in the database and optionally +ignore file hashes. + +=head1 METHODS + +=over + +=item $hashl = App::Hashl->new(I<%conf>) + +Returns a new B<App::Hashl> object. Accepted parameters are: + +=over +=item B<read_size> => I<bytes> + +How many bytes of a file to consider for the hash. Defaults to 4 MiB (4 * +2**20 bytes). + +=back + +=cut sub new { my ($obj, %conf) = @_; @@ -41,12 +63,26 @@ sub new { return bless($ref, $obj); } +=item $hashl = App::Hashl->new_from_file(I<$file>) + +Returns the B<App::Hashl> object saved to I<file> by a prior $hashl->save +call. + +=cut + sub new_from_file { my ($obj, $file) = @_; my $ref = retrieve($file); return bless($ref, $obj); } +=item $hashl->si_size(I<$bytes>) + +Returns I<bytes> as a human-readable SI-size, such as "1.0k", "50.7M", "2.1G". +The returned string is always sex characters long. + +=cut + sub si_size { my ($self, $bytes) = @_; my @post = (' ', qw(k M G T)); @@ -59,6 +95,13 @@ sub si_size { return sprintf("%6.1f%s", $bytes, $post[0]); } +=item $hashl->hash_file(I<$file>) + +Returns the SHA1 hash of the first n bytes (as configured via B<read_size>) of +I<file> + +=cut + sub hash_file { my ($self, $file) = @_; my ($fh, $data); @@ -71,6 +114,14 @@ sub hash_file { return sha1_hex($data); } +=item $hashl->hash_in_db(I<$hash>) + +Checks if I<hash> is in the database. If it is, returns the filename it is +associated with. If it is ignored, returns "// ignored" (subject to change). +Otherwise, returns undef. + +=cut + sub hash_in_db { my ($self, $hash) = @_; @@ -92,50 +143,114 @@ sub hash_in_db { return undef; } +=item $hashl->file_in_db(I<$file>) + +Checks if I<file>'s hash is in the database. For the return value, see +B<hash_in_db>. + +=cut + sub file_in_db { my ($self, $file) = @_; return $self->hash_in_db($self->hash_file($file)); } +=item $hashl->read_size() + +Returns the current read size. Note that once an B<App::Hashl> object has +been created, it is not possible to change the read size. + +=cut + sub read_size { my ($self) = @_; return $self->{config}->{read_size}; } +=item $hashl->file(I<$name>) + +Returns a hashref describing the file. The layout is as follows: + + hash => file's hash, + mtime => mtime as UNIX timestamp, + size => file size in bytes, + +=cut + sub file { my ($self, $name) = @_; return $self->{files}->{$name}; } +=item $hashl->delete_file(I<$name>) + +Remove the file from the database + +=cut + sub delete_file { my ($self, $name) = @_; delete $self->{files}->{$name}; } +=item $hashl->files() + +Returns a list of all file names in the database + +=cut + sub files { my ($self) = @_; return sort keys %{ $self->{files} }; } +=item $hashl->add_file(I<%data>) + +Add a file to the database. Required keys in I<%data> are: + +=over + +=item B<file> => I<name> + +relateve file name to store in the database + +=item B<path> => I<path> + +Full path to the file + +=back + +If the file already is in the database, it is only updated if both the file +size and the mtime have changed. + +=cut + sub add_file { my ($self, %data) = @_; my $file = $data{file}; my $path = $data{path}; + my ($size, $mtime) = (stat($path))[7,9]; if ($self->file($file) and - $self->file($file)->{mtime} == $data{mtime} and - $self->file($file)->{size} == $data{size} ) { + $self->file($file)->{mtime} == $mtime and + $self->file($file)->{size} == $size ) { return; } $self->{files}->{$file} = { hash => $self->hash_file($file), - mtime => $data{mtime}, - size => $data{size}, + mtime => $mtime, + size => $size, }; } +=item $hashl->ignored() + +Returns a list of all ignored file hashes + +=cut + sub ignored { my ($self) = @_; if (exists $self->{ignored}->{hashes}) { @@ -146,6 +261,13 @@ sub ignored { } } +=item $hashl->ignore(I<$file>, I<$path>) + +Removes I<$file> from the database and adds I<$path> to the list of ignored +file hashes. + +=cut + sub ignore { my ($self, $file, $path) = @_; @@ -153,6 +275,13 @@ sub ignore { push(@{ $self->{ignored}->{hashes} }, $self->hash_file($path)); } +=item $hashl->save(I<$file>) + +Save the B<App::Hashl> object with all data to I<$file>. It can later be +retrieved via B<new_from_file>. + +=cut + sub save { my ($self, $file) = @_; nstore($self, $file); @@ -162,21 +291,11 @@ sub save { __END__ -=head1 NAME - -=head1 SYNOPSIS - -=head1 DESCRIPTION - -=head1 METHODS - -=over - =back =head1 DEPENDENCIES -=head1 SEE ALSO +B<Digest::SHA>. =head1 AUTHOR |