blob: 7f6574519bdb85debdf87508bfde7b8b49b26ef6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use WWW::Mechanize;
my $mech = WWW::Mechanize->new(
stack_depth => 0,
);
sub find_next_link {
foreach my $re (
qr{ ^ next $ }ix,
qr{ next }ix,
)
{
my $link = $mech->find_link(text_regex => $re);
if ($link) {
return $link;
}
}
die("Cannot find next link\n");
}
sub read_file {
my ($filename) = @_;
my ($line, $fh);
if (not open($fh, '<', $filename)) {
warn("Cannot open $filename: $!\n");
return;
}
$line = <$fh>;
close($fh) or warn("Cannot close $filename: $!\n");
chomp $line;
return $line;
}
sub save_lasturi {
open(my $fh, '>', 'last_uri') or die("Cannot open last_uri: $!\n");
print {$fh} $mech->uri->as_string;
close($fh) or die("Cannot close last_uri: $!\n");
return;
}
my $uri = shift || read_file('last_uri');
my $image_re = read_file('image_re');
if (not defined $uri or not defined $image_re) {
die("last_uri or image_re not found / specified\n");
}
$SIG{INT} = sub {
save_lasturi();
exit(0);
};
while (
$mech->get($uri)
and $mech->success()
and $mech->status() == 200
)
{
say $mech->uri->as_string;
say $uri;
$uri = find_next_link->URI->abs->as_string;
if ($uri eq $mech->uri->as_string) {
save_lasturi();
die("Looks like we're in a loop, bailing out\n");
}
sleep(1);
}
__END__
=head1 NAME
=head1 SYNOPSIS
=head1 DESCRIPTION
=head1 OPTIONS
=head1 EXIT STATUS
=head1 CONFIGURATION
=head1 DEPENDENCIES
=head1 BUGS AND LIMITATIONS
=head1 AUTHOR
Copyright (C) 2010 by Daniel Friesel E<lt>derf@chaosdorf.deE<gt>
=head1 LICENSE
0. You just DO WHAT THE FUCK YOU WANT TO.
|