summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorDaniel Friesel <derf@derf.homelinux.org>2010-05-22 21:01:18 +0200
committerDaniel Friesel <derf@derf.homelinux.org>2010-05-22 21:01:18 +0200
commitc272dcceb53c8894a2d421390f506dc6c948af6c (patch)
treeca872df0a8827295e1b66d695226db578cb76677 /bin
parent38723e017d0984d5d9a1ac2a8cd78a363f450ab4 (diff)
Use comirror.{conf,state} instead of various one-line files
Diffstat (limited to 'bin')
-rwxr-xr-xbin/comirror96
-rwxr-xr-xbin/comirror-setup50
2 files changed, 88 insertions, 58 deletions
diff --git a/bin/comirror b/bin/comirror
index b5e20ab..0de6d22 100755
--- a/bin/comirror
+++ b/bin/comirror
@@ -9,10 +9,11 @@ my $mech = WWW::Mechanize->new(
stack_depth => 2,
);
-my $uri = shift || first_line('last_uri');
-my $image_re = first_line('image_re');
+my %conf = file_to_hash('comirror.conf');
+my %state = file_to_hash('comirror.state');
+my $uri = shift || $state{'uri'};
+my $image_re = $conf{'image_re'};
my $exit = 1;
-my $next_link_text;
if (not defined $uri or not defined $image_re) {
die("last_uri or image_re not found / specified\n");
@@ -20,14 +21,10 @@ if (not defined $uri or not defined $image_re) {
$image_re = qr{$image_re};
-if (-e 'next_link') {
- $next_link_text = first_line('next_link');
-}
-
sub find_next_link {
- if (defined $next_link_text) {
- my $link = $mech->find_link(text => $next_link_text);
+ if (defined $conf{'next_link'}) {
+ my $link = $mech->find_link(text => $conf{'next_link'});
if ($link) {
return $link;
}
@@ -45,7 +42,7 @@ sub find_next_link {
}
}
}
- save_lasturi();
+ save_state();
say "Cannot find next link. We might have reached the end of the comic.";
exit $exit;
}
@@ -79,36 +76,40 @@ sub get_image {
return;
}
-sub first_line {
- my ($filename) = @_;
- my ($line, $fh);
+sub file_to_hash {
+ my ($file) = @_;
+ my %return;
- if (not open($fh, '<', $filename)) {
- warn("Cannot open $filename: $!\n");
+ if (not -e $file) {
return;
}
- $line = <$fh>;
- close($fh) or warn("Cannot close $filename: $!\n");
-
- chomp $line;
- return $line;
+ open(my $fh, '<', $file) or die("Cannot read $file: $!\n");
+ while(my $line = <$fh>) {
+ $line =~ / ^ (?<key> \S+ ) [[:space:]]+ (?<value> .*) $ /x or next;
+ $return{$+{key}} = $+{value};
+ }
+ close($fh);
+ return %return;
}
-sub save_lasturi {
-
+sub save_state {
# Some webcomics have a non-regular page for the last (as in, latest)
# image. Work around this.
$mech->back();
- open(my $fh, '>', 'last_uri') or die("Cannot open last_uri: $!");
- print {$fh} $mech->uri->as_string;
+ $state{'uri'} = $mech->uri->as_string;
+
+ open(my $fh, '>', 'comirror.state') or die("Cannot open comirror.state: $!");
+ while (my ($key, $value) = each(%state)) {
+ print {$fh} "$key\t$value\n";
+ }
close($fh) or die("Cannot close last_uri: $!");
return;
}
local $SIG{INT} = sub {
- save_lasturi();
+ save_state();
exit $exit;
};
@@ -125,7 +126,7 @@ while (
$uri = find_next_link->URI->abs->as_string;
if ($uri eq $mech->uri->as_string) {
- save_lasturi();
+ save_state();
say "The 'next' link lead us to a loop.";
say "This is probably because we reached the end of the comic.";
exit $exit;
@@ -162,28 +163,49 @@ B<comirror> takes no options.
Zero if at least one new comic image was downloaded, one if either no images
were found or all found images already existed in the current directory. Any
-other non-zero means indicates grave errors.
+other non-zero return value indicates grave errors.
=head1 CONFIGURATION
B<comirror> is designed to operate in the current working directory. Images
-are saved to it and a few files ard read by B<comirror> as configuration
-parameters.
+are saved to it; the configuration is read from F<comirror.conf> and the last
+state (if any) is read from F<comirror.state>.
+
+Both files are formatted in the form key <tab> value with one key-value pair
+per line. Comments or empty lines are not supported. F<comirror.state> is
+automatically written when B<comirror> terminates.
+
+=head2 COMIRROR.CONF
+
+A little explanation of the F<comirror.conf> keys.
+Note that comirror-setup(1) will automatically create this file for you, you
+only need to edit it if comirror-setup(1) didn't work properly or you don't
+want to use it at all.
=over
-=item F<image_re>
+=item image_re
A regular expression matching the URL of the webcomic image to be saved.
-You can either create the file manually or let it be created by
-comirror-setup(1).
-=item F<last_uri>
+=item next_link
+
+The text on the link to the next image. Can be left out if it contains
+"next".
+
+=back
+
+=head2 COMIRROR.STATE
+
+You should not need to edit this.
+
+=over
+
+=item uri
-The URI to the last but one comic site before B<comirror> exited is
-automatically written to this file. If this file exists and B<comirror> is
-called without arguments, it will automatically resume crawling the webcomic
-from that point on.
+Absolute URI to the last but one comic page B<comirror> was inspecting. Can
+be overridden by the commandline argument. Exists so that B<comirror> will
+resume its comic crawl from the right point when it's started again.
=back
diff --git a/bin/comirror-setup b/bin/comirror-setup
index f5d4c76..7da0704 100755
--- a/bin/comirror-setup
+++ b/bin/comirror-setup
@@ -5,23 +5,25 @@ use 5.010;
use WWW::Mechanize;
-sub line_to_file {
- my ($line, $file) = @_;
- open(my $fh, '>', $file) or die("Can't open $file for writing: $!\n");
- say {$fh} $line;
- close($fh) or die("Can't close $file: $!\n");
- return;
-}
-
my @mechs;
my @images;
my @unique_images;
my ($image_re, $cache) = (q{}) x 2;
my $length;
-my $next_link;
+my ($conf, $state);
local $| = 1;
+sub hash_to_file {
+ my ($hash, $file) = @_;
+ open(my $fh, '>', $file) or die("Can't open $file for writing: $!\n");
+ while (my ($key, $value) = each(%{$hash})) {
+ print {$fh} "$key\t$value\n";
+ }
+ close($fh);
+ return;
+}
+
if (@ARGV != 3 ) {
die("Need three URLs to compare (first, second, last but one)\n");
}
@@ -39,7 +41,6 @@ print "\nComparing images";
for my $i ( 0 .. $#mechs ) {
for my $image ($mechs[$i]->find_all_images()) {
push(@{$images[$i]}, $image->url_abs());
- say "$i $images[$i]->[-1]";
}
print q{.};
}
@@ -48,7 +49,7 @@ print "\n";
for my $link ($mechs[0]->find_all_links()) {
if ($link->url_abs eq $ARGV[1]) {
- $next_link = $link->text;
+ $conf->{'next_link'} = $link->text;
}
}
@@ -93,13 +94,17 @@ for my $offset ( 0 .. $length ) {
}
}
-line_to_file($ARGV[0], 'last_uri');
-line_to_file($image_re, 'image_re');
-line_to_file($next_link, 'next_link');
+$state->{'uri'} = $ARGV[0];
+
+$conf->{'image_re'} = $image_re;
-print "\nimage_re: ${image_re}\n\n";
-say "\"next\" link text: ${next_link}";
-say "If this is correct, type 'comirror' to start mirroring";
+hash_to_file($conf , 'comirror.conf' );
+hash_to_file($state, 'comirror.state');
+
+say "\nimage_re: $conf->{image_re}";
+say "\"next\" link text: $conf->{next_link}";
+
+say "\nIf this is correct, type 'comirror' to start mirroring";
__END__
@@ -113,14 +118,17 @@ B<comirror-setup> I<comis urls...>
=head1 DESCRIPTION
-B<comirror-setup> takes three URL argumets: The very first page of the web
+B<comirror-setup> takes three URL arguments: The very first page of the web
comic, the second page, and the last but one page.
Based on these arguments, it tries to set up the current working directory so
that you only need to call B<comirror> to mirror the webcomic you were
pointing to. It does this by comparing the last two URLs to determine a
-correct image_re and then creating last_uri with the first URL so that
-B<comirror> will start at the right point and download the right images.
+correct regular expression for the comic images and comparing the first two to
+figure out the text on the "next" link.
+
+It then creates F<comirror.conf> and F<comirror.state> so that you only need
+to call comirror(1) to start downloading.
=head1 OPTIONS
@@ -147,7 +155,7 @@ get what you want.
=head1 AUTHOR
-Copyright (C) @@year@@ by Daniel Friesel E<lt>derf@chaosdorf.deE<gt>
+Copyright (C) 2010 by Daniel Friesel E<lt>derf@chaosdorf.deE<gt>
=head1 LICENSE