1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;
use WWW::Mechanize;
sub line_to_file {
my ($line, $file) = @_;
open(my $fh, '>', $file) or die("Can't open $file for writing: $!\n");
say {$fh} $line;
close($fh) or die("Can't close $file: $!\n");
return;
}
my @mechs;
my @images;
my @unique_images;
my ($image_re, $cache) = (q{}) x 2;
my $length;
local $| = 1;
if (@ARGV != 3 ) {
die("Need three URLs to compare (first, second, last but one)\n");
}
print 'Fetching pages';
for my $url (@ARGV) {
push(@mechs, WWW::Mechanize->new( stackdepth => 0 ));
$mechs[-1]->get($url);
print q{.};
}
print "\nComparing images";
for my $i ( 0 .. $#mechs ) {
for my $image ($mechs[$i]->find_all_images()) {
push(@{$images[$i]}, $image->url_abs());
}
print q{.};
}
print "\n";
# A bit fragile so far. We assume that every site is exactly the same, except
# for the actual comic image. For this to work, we need to be sure that we are
# not comparing with a first or last site, because those may be missing a
# next/prev icon and therefore confuse us.
for my $i ( 0 .. $#{$images[1]} ) {
if ($images[1]->[$i] ne $images[2]->[$i]) {
push(@unique_images, [$images[1]->[$i], $images[2]->[$i]]);
}
}
# XKCD has a weird robot detection image. So we just take the first
# @unique_images element for now. Again, this could use more elegance some
# time.
if (length($unique_images[0]->[0]) <= length($unique_images[0]->[1])) {
$length = length($unique_images[0]->[0]);
}
else {
$length = length($unique_images[0]->[1]);
}
for my $offset ( 0 .. $length ) {
my $char1 = substr($unique_images[0]->[0], $offset, 1);
my $char2 = substr($unique_images[0]->[1], $offset, 1);
if ($char1 ne $char2) {
$image_re .= q{.+};
last;
}
$cache .= $char1;
# Prevent using .../something.+ if we happen to have two images whose
# names start with the same letter(s). Again, fragile.
if ($char1 =~ / [^a-zA-Z0-9] /x) {
$image_re .= $cache;
$cache = q{};
}
}
line_to_file($ARGV[0], 'last_uri');
line_to_file($image_re, 'image_re');
print "\nimage_re: ${image_re}\n\n";
print "If this is correct, type 'comirror' to start mirroring\n";
|