summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/hashl3
-rw-r--r--lib/App/Hashl.pm157
-rw-r--r--t/29-app-hashl.t7
3 files changed, 141 insertions, 26 deletions
diff --git a/bin/hashl b/bin/hashl
index 89c0219..3948821 100755
--- a/bin/hashl
+++ b/bin/hashl
@@ -165,13 +165,10 @@ sub process_file {
sub db_update {
my ($file, $path) = @_;
- my ($size, $mtime) = (stat($path))[7,9];
$hashl->add_file(
file => $file,
- mtime => $mtime,
path => $path,
- size => $size,
);
}
diff --git a/lib/App/Hashl.pm b/lib/App/Hashl.pm
index 9ce17a5..56c925b 100644
--- a/lib/App/Hashl.pm
+++ b/lib/App/Hashl.pm
@@ -21,12 +21,34 @@ App::Hashl - Partially hash files, check if files are equal etc.
my $hashl = App::Hashl->new();
# or: App::Hashl->new_from_file($database_file);
- for my $file (@files) {
- $hashl->add_file($file, {
- hash
+=head1 VERSION
-=cut
+This manual documents App::Hashl version 0.2
+
+=head1 DESCRIPTION
+
+App::Hashl contains utilities to hash the first n bytes of a file, store and
+recall this, check if another file is already in the database and optionally
+ignore file hashes.
+
+=head1 METHODS
+
+=over
+
+=item $hashl = App::Hashl->new(I<%conf>)
+
+Returns a new B<App::Hashl> object. Accepted parameters are:
+
+=over
+=item B<read_size> => I<bytes>
+
+How many bytes of a file to consider for the hash. Defaults to 4 MiB (4 *
+2**20 bytes).
+
+=back
+
+=cut
sub new {
my ($obj, %conf) = @_;
@@ -41,12 +63,26 @@ sub new {
return bless($ref, $obj);
}
+=item $hashl = App::Hashl->new_from_file(I<$file>)
+
+Returns the B<App::Hashl> object saved to I<file> by a prior $hashl->save
+call.
+
+=cut
+
sub new_from_file {
my ($obj, $file) = @_;
my $ref = retrieve($file);
return bless($ref, $obj);
}
+=item $hashl->si_size(I<$bytes>)
+
+Returns I<bytes> as a human-readable SI-size, such as "1.0k", "50.7M", "2.1G".
+The returned string is always sex characters long.
+
+=cut
+
sub si_size {
my ($self, $bytes) = @_;
my @post = (' ', qw(k M G T));
@@ -59,6 +95,13 @@ sub si_size {
return sprintf("%6.1f%s", $bytes, $post[0]);
}
+=item $hashl->hash_file(I<$file>)
+
+Returns the SHA1 hash of the first n bytes (as configured via B<read_size>) of
+I<file>
+
+=cut
+
sub hash_file {
my ($self, $file) = @_;
my ($fh, $data);
@@ -71,6 +114,14 @@ sub hash_file {
return sha1_hex($data);
}
+=item $hashl->hash_in_db(I<$hash>)
+
+Checks if I<hash> is in the database. If it is, returns the filename it is
+associated with. If it is ignored, returns "// ignored" (subject to change).
+Otherwise, returns undef.
+
+=cut
+
sub hash_in_db {
my ($self, $hash) = @_;
@@ -92,50 +143,114 @@ sub hash_in_db {
return undef;
}
+=item $hashl->file_in_db(I<$file>)
+
+Checks if I<file>'s hash is in the database. For the return value, see
+B<hash_in_db>.
+
+=cut
+
sub file_in_db {
my ($self, $file) = @_;
return $self->hash_in_db($self->hash_file($file));
}
+=item $hashl->read_size()
+
+Returns the current read size. Note that once an B<App::Hashl> object has
+been created, it is not possible to change the read size.
+
+=cut
+
sub read_size {
my ($self) = @_;
return $self->{config}->{read_size};
}
+=item $hashl->file(I<$name>)
+
+Returns a hashref describing the file. The layout is as follows:
+
+ hash => file's hash,
+ mtime => mtime as UNIX timestamp,
+ size => file size in bytes,
+
+=cut
+
sub file {
my ($self, $name) = @_;
return $self->{files}->{$name};
}
+=item $hashl->delete_file(I<$name>)
+
+Remove the file from the database
+
+=cut
+
sub delete_file {
my ($self, $name) = @_;
delete $self->{files}->{$name};
}
+=item $hashl->files()
+
+Returns a list of all file names in the database
+
+=cut
+
sub files {
my ($self) = @_;
return sort keys %{ $self->{files} };
}
+=item $hashl->add_file(I<%data>)
+
+Add a file to the database. Required keys in I<%data> are:
+
+=over
+
+=item B<file> => I<name>
+
+relateve file name to store in the database
+
+=item B<path> => I<path>
+
+Full path to the file
+
+=back
+
+If the file already is in the database, it is only updated if both the file
+size and the mtime have changed.
+
+=cut
+
sub add_file {
my ($self, %data) = @_;
my $file = $data{file};
my $path = $data{path};
+ my ($size, $mtime) = (stat($path))[7,9];
if ($self->file($file) and
- $self->file($file)->{mtime} == $data{mtime} and
- $self->file($file)->{size} == $data{size} ) {
+ $self->file($file)->{mtime} == $mtime and
+ $self->file($file)->{size} == $size ) {
return;
}
$self->{files}->{$file} = {
hash => $self->hash_file($file),
- mtime => $data{mtime},
- size => $data{size},
+ mtime => $mtime,
+ size => $size,
};
}
+=item $hashl->ignored()
+
+Returns a list of all ignored file hashes
+
+=cut
+
sub ignored {
my ($self) = @_;
if (exists $self->{ignored}->{hashes}) {
@@ -146,6 +261,13 @@ sub ignored {
}
}
+=item $hashl->ignore(I<$file>, I<$path>)
+
+Removes I<$file> from the database and adds I<$path> to the list of ignored
+file hashes.
+
+=cut
+
sub ignore {
my ($self, $file, $path) = @_;
@@ -153,6 +275,13 @@ sub ignore {
push(@{ $self->{ignored}->{hashes} }, $self->hash_file($path));
}
+=item $hashl->save(I<$file>)
+
+Save the B<App::Hashl> object with all data to I<$file>. It can later be
+retrieved via B<new_from_file>.
+
+=cut
+
sub save {
my ($self, $file) = @_;
nstore($self, $file);
@@ -162,21 +291,11 @@ sub save {
__END__
-=head1 NAME
-
-=head1 SYNOPSIS
-
-=head1 DESCRIPTION
-
-=head1 METHODS
-
-=over
-
=back
=head1 DEPENDENCIES
-=head1 SEE ALSO
+B<Digest::SHA>.
=head1 AUTHOR
diff --git a/t/29-app-hashl.t b/t/29-app-hashl.t
index 608a3a2..20d0734 100644
--- a/t/29-app-hashl.t
+++ b/t/29-app-hashl.t
@@ -29,19 +29,18 @@ is_deeply([$hashl->files()], [], 'no files in empty db');
is_deeply([$hashl->ignored()], [], 'no ignored files in empty db');
my $test_hash = $hashl->hash_file('t/in/4');
+my ($test_size, $test_mtime) = (stat('t/in/4'))[7,9];
ok($hashl->add_file(
file => 't/in/4',
path => 't/in/4',
- mtime => 123,
- size => 4,
),
'Add new file'
);
is_deeply($hashl->file('t/in/4'),
{
hash => $test_hash,
- size => 4,
- mtime => 123,
+ size => $test_size,
+ mtime => $test_mtime,
},
'hashl->file okay'
);