summaryrefslogtreecommitdiff
path: root/bin/comirror-setup
blob: 7f728208ff596626772d0dba4c8d817ab4841498 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env perl
use strict;
use warnings;
use 5.010;

use WWW::Mechanize;

sub line_to_file {
	my ($line, $file) = @_;
	open(my $fh, '>', $file) or die("Can't open $file for writing: $!\n");
	say {$fh} $line;
	close($fh) or die("Can't close $file: $!\n");
	return;
}

my @mechs;
my @images;
my @unique_images;
my ($image_re, $cache) = (q{}) x 2;
my $length;

local $| = 1;

if (@ARGV != 3 ) {
	die("Need three URLs to compare (first, second, last but one)\n");
}

print 'Fetching pages';

for my $url (@ARGV) {
	push(@mechs, WWW::Mechanize->new( stackdepth => 0 ));
	$mechs[-1]->get($url);
	print q{.};
}

print "\nComparing images";

for my $i ( 0 .. $#mechs ) {
	for my $image ($mechs[$i]->find_all_images()) {
		push(@{$images[$i]}, $image->url_abs());
	}
	print q{.};
}

print "\n";

# A bit fragile so far. We assume that every site is exactly the same, except
# for the actual comic image. For this to work, we need to be sure that we are
# not comparing with a first or last site, because those may be missing a
# next/prev icon and therefore confuse us.

for my $i ( 0 .. $#{$images[1]} ) {
	if ($images[1]->[$i] ne $images[2]->[$i]) {
		push(@unique_images, [$images[1]->[$i], $images[2]->[$i]]);
	}
}

# XKCD has a weird robot detection image. So we just take the first
# @unique_images element for now. Again, this could use more elegance some
# time.

if (length($unique_images[0]->[0]) <= length($unique_images[0]->[1])) {
	$length = length($unique_images[0]->[0]);
}
else {
	$length = length($unique_images[0]->[1]);
}

for my $offset ( 0 .. $length ) {
	my $char1 = substr($unique_images[0]->[0], $offset, 1);
	my $char2 = substr($unique_images[0]->[1], $offset, 1);

	if ($char1 ne $char2) {
		$image_re .= q{.+};
		last;
	}

	$cache .= $char1;

	# Prevent using .../something.+ if we happen to have two images whose
	# names start with the same letter(s). Again, fragile.
	if ($char1 =~ / [^a-zA-Z0-9] /x) {
		$image_re .= $cache;
		$cache = q{};
	}
}

line_to_file($ARGV[0], 'last_uri');
line_to_file($image_re, 'image_re');

print "\nimage_re: ${image_re}\n\n";
print "If this is correct, type 'comirror' to start mirroring\n";