summaryrefslogtreecommitdiff
path: root/bin/comirror
blob: a0992124d094c2a6c08d9625a75e037ffdaf9a15 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;

use WWW::Mechanize;

my $mech = WWW::Mechanize->new(
	stack_depth => 0,
);

sub usage {
	die("Usage: comirror <start url>\n");
}

sub find_next_link {
	foreach my $re (
		qr{ ^ next $ }ix,
		qr{   next   }ix,
	)
	{
		my $link = $mech->find_link(text_regex => $re);
		if ($link) {
			return $link;
		}
	}
	die("Cannot find next link\n");
}

my $uri = shift or usage();

while (
	$mech->get($uri)
	and $mech->success()
	and $mech->status() == 200
      )
{
	say $mech->uri->as_string;
	say $uri;

	$uri = find_next_link->URI->abs->as_string;

	if ($uri eq $mech->uri->as_string) {
		die("Looks like we're in a loop, bailing out\n");
	}

	sleep(1);
}




__END__

=head1 NAME

=head1 SYNOPSIS

=head1 DESCRIPTION

=head1 OPTIONS

=head1 EXIT STATUS

=head1 CONFIGURATION

=head1 DEPENDENCIES

=head1 BUGS AND LIMITATIONS

=head1 AUTHOR

Copyright (C) 2010 by Daniel Friesel E<lt>derf@chaosdorf.deE<gt>

=head1 LICENSE

  0. You just DO WHAT THE FUCK YOU WANT TO.