blob: d03e3af36f01f189b41cecff108524a6492941bb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use WWW::Mechanize;
my $mech = WWW::Mechanize->new(
stack_depth => 0,
);
sub usage {
die("Usage: comirror <start url>\n");
}
sub find_next_link {
foreach my $re (
qr{ ^ next $ }ix,
qr{ next }ix,
)
{
my $link = $mech->find_link(text_regex => $re);
if ($link) {
return $link;
}
}
die("Cannot find next link\n");
}
my $uri = shift or usage();
while (
$mech->get($uri)
and $mech->success()
and $mech->status() == 200
)
{
say $mech->uri();
$uri = find_next_link->url();
if ($uri == $mech->uri) {
die("Looks like we're in a loop, bailing out\n");
}
sleep(1);
}
__END__
=head1 NAME
=head1 SYNOPSIS
=head1 DESCRIPTION
=head1 OPTIONS
=head1 EXIT STATUS
=head1 CONFIGURATION
=head1 DEPENDENCIES
=head1 BUGS AND LIMITATIONS
=head1 AUTHOR
Copyright (C) 2010 by Daniel Friesel E<lt>derf@chaosdorf.deE<gt>
=head1 LICENSE
0. You just DO WHAT THE FUCK YOU WANT TO.
|