#!/usr/bin/env perl use strict; use warnings; use 5.010; use WWW::Mechanize; my $mech = WWW::Mechanize->new( stack_depth => 2, ); my $uri = shift || read_file('last_uri'); my $image_re = read_file('image_re'); if (not defined $uri or not defined $image_re) { die("last_uri or image_re not found / specified\n"); } $image_re = qr{$image_re}; sub find_next_link { foreach my $re ( qr{ ^ next $ }ix, qr{ next }ix, ) { my $link = $mech->find_link(text_regex => $re); if ($link) { return $link; } } save_lasturi(); die("Cannot find next link\n"); } sub find_image { my $image = $mech->find_image(url_abs_regex => $image_re); if ($image) { my $tmpmech = WWW::Mechanize->new(); $tmpmech->get($image->url_abs); return $tmpmech; } return; } sub get_image { my $tmpmech = find_image() or return; my $filename = (split(qr{/}o, $tmpmech->uri->as_string))[-1]; if (-e $filename) { say "img: $filename (skipped)"; } else { say "img: $filename"; open(my $fh, '>', $filename) or die("Cannot open $filename: $!\n"); print {$fh} $tmpmech->content(); close($fh) or die("Cannot close $filename: $!\n"); } return; } sub read_file { my ($filename) = @_; my ($line, $fh); if (not open($fh, '<', $filename)) { warn("Cannot open $filename: $!\n"); return; } $line = <$fh>; close($fh) or warn("Cannot close $filename: $!\n"); chomp $line; return $line; } sub save_lasturi { # Some webcomics have a non-regular page for the last (as in, latest) # image. Work around this. $mech->back(); open(my $fh, '>', 'last_uri') or die("Cannot open last_uri: $!\n"); print {$fh} $mech->uri->as_string; close($fh) or die("Cannot close last_uri: $!\n"); return; } $SIG{INT} = sub { save_lasturi(); exit(0); }; while ( $mech->get($uri) and $mech->success() and $mech->status() == 200 ) { say "URI: $uri"; get_image; $uri = find_next_link->URI->abs->as_string; if ($uri eq $mech->uri->as_string) { save_lasturi(); die("Looks like we're in a loop, bailing out\n"); } print "\n"; sleep(1); } __END__ =head1 NAME =head1 SYNOPSIS =head1 DESCRIPTION =head1 OPTIONS =head1 EXIT STATUS =head1 CONFIGURATION =head1 DEPENDENCIES =head1 BUGS AND LIMITATIONS =head1 AUTHOR Copyright (C) 2010 by Daniel Friesel Ederf@chaosdorf.deE =head1 LICENSE 0. You just DO WHAT THE FUCK YOU WANT TO.