summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <derf@derf.homelinux.org>2010-05-22 11:23:10 +0200
committerDaniel Friesel <derf@derf.homelinux.org>2010-05-22 11:23:10 +0200
commita95f3dd8887fa3f4f340b621688ec42ae795e50a (patch)
tree008b29e3fec6aec73cb3dc05868685b6db0a6d2d
parent753cd2636e2fbc211405342dcf879a6a29572eee (diff)
Add extremely primitive but working[tm] comirror-setup script
-rwxr-xr-xbin/comirror-setup96
1 files changed, 96 insertions, 0 deletions
diff --git a/bin/comirror-setup b/bin/comirror-setup
new file mode 100755
index 0000000..0a7acc4
--- /dev/null
+++ b/bin/comirror-setup
@@ -0,0 +1,96 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use 5.010;
+
+use constant {
+ MIN_COMIC_DIM => 12
+};
+
+use WWW::Mechanize;
+
+sub line_to_file {
+ my ($line, $file) = @_;
+ open(my $fh, '>', $file) or die("Can't open $file for writing: $!\n");
+ say {$fh} $line;
+ close($fh) or die("Can't close $file: $!\n");
+ return;
+}
+
+my @mechs;
+my @images;
+my @unique_images;
+my ($image_re, $cache) = (q{}) x 2;
+my $length;
+
+local $| = 1;
+
+if (@ARGV != 3 ) {
+ die("Need three URLs to compare (first, second, last but one)\n");
+}
+
+print 'Fetching pages';
+
+for my $url (@ARGV) {
+ push(@mechs, WWW::Mechanize->new( stackdepth => 0 ));
+ $mechs[-1]->get($url);
+ print q{.};
+}
+
+print "\nComparing images";
+
+for my $i ( 0 .. $#mechs ) {
+ for my $image ($mechs[$i]->find_all_images()) {
+ push(@{$images[$i]}, $image->url_abs());
+ }
+ print q{.};
+}
+
+print "\n";
+
+# A bit fragile so far. We assume that every site is exactly the same, except
+# for the actual comic image. For this to work, we need to be sure that we are
+# not comparing with a first or last site, because those may be missing a
+# next/prev icon and therefore confuse us.
+
+for my $i ( 0 .. $#{$images[1]} ) {
+ if ($images[1]->[$i] ne $images[2]->[$i]) {
+ push(@unique_images, [$images[1]->[$i], $images[2]->[$i]]);
+ }
+}
+
+# XKCD has a weird robot detection image. So we just take the first
+# @unique_images element for now. Again, this could use more elegance some
+# time.
+
+if (length($unique_images[0]->[0]) <= length($unique_images[0]->[1])) {
+ $length = length($unique_images[0]->[0]);
+}
+else {
+ $length = length($unique_images[0]->[1]);
+}
+
+for my $offset ( 0 .. $length ) {
+ my $char1 = substr($unique_images[0]->[0], $offset, 1);
+ my $char2 = substr($unique_images[0]->[1], $offset, 1);
+
+ if ($char1 ne $char2) {
+ $image_re .= q{.+};
+ last;
+ }
+
+ $cache .= $char1;
+
+ # Prevent using .../something.+ if we happen to have two images whose
+ # names start with the same letter(s). Again, fragile.
+ if ($char1 =~ / [^a-zA-Z0-9] /x) {
+ $image_re .= $cache;
+ $cache = q{};
+ }
+}
+
+line_to_file($ARGV[0], 'last_uri');
+line_to_file($image_re, 'image_re');
+
+print "\nimage_re: ${image_re}\n\n";
+print "If this is correct, type 'comirror' to start mirroring\n";