From a95f3dd8887fa3f4f340b621688ec42ae795e50a Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Sat, 22 May 2010 11:23:10 +0200 Subject: Add extremely primitive but working[tm] comirror-setup script --- bin/comirror-setup | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100755 bin/comirror-setup diff --git a/bin/comirror-setup b/bin/comirror-setup new file mode 100755 index 0000000..0a7acc4 --- /dev/null +++ b/bin/comirror-setup @@ -0,0 +1,96 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use 5.010; + +use constant { + MIN_COMIC_DIM => 12 +}; + +use WWW::Mechanize; + +sub line_to_file { + my ($line, $file) = @_; + open(my $fh, '>', $file) or die("Can't open $file for writing: $!\n"); + say {$fh} $line; + close($fh) or die("Can't close $file: $!\n"); + return; +} + +my @mechs; +my @images; +my @unique_images; +my ($image_re, $cache) = (q{}) x 2; +my $length; + +local $| = 1; + +if (@ARGV != 3 ) { + die("Need three URLs to compare (first, second, last but one)\n"); +} + +print 'Fetching pages'; + +for my $url (@ARGV) { + push(@mechs, WWW::Mechanize->new( stackdepth => 0 )); + $mechs[-1]->get($url); + print q{.}; +} + +print "\nComparing images"; + +for my $i ( 0 .. $#mechs ) { + for my $image ($mechs[$i]->find_all_images()) { + push(@{$images[$i]}, $image->url_abs()); + } + print q{.}; +} + +print "\n"; + +# A bit fragile so far. We assume that every site is exactly the same, except +# for the actual comic image. For this to work, we need to be sure that we are +# not comparing with a first or last site, because those may be missing a +# next/prev icon and therefore confuse us. + +for my $i ( 0 .. $#{$images[1]} ) { + if ($images[1]->[$i] ne $images[2]->[$i]) { + push(@unique_images, [$images[1]->[$i], $images[2]->[$i]]); + } +} + +# XKCD has a weird robot detection image. So we just take the first +# @unique_images element for now. Again, this could use more elegance some +# time. + +if (length($unique_images[0]->[0]) <= length($unique_images[0]->[1])) { + $length = length($unique_images[0]->[0]); +} +else { + $length = length($unique_images[0]->[1]); +} + +for my $offset ( 0 .. $length ) { + my $char1 = substr($unique_images[0]->[0], $offset, 1); + my $char2 = substr($unique_images[0]->[1], $offset, 1); + + if ($char1 ne $char2) { + $image_re .= q{.+}; + last; + } + + $cache .= $char1; + + # Prevent using .../something.+ if we happen to have two images whose + # names start with the same letter(s). Again, fragile. + if ($char1 =~ / [^a-zA-Z0-9] /x) { + $image_re .= $cache; + $cache = q{}; + } +} + +line_to_file($ARGV[0], 'last_uri'); +line_to_file($image_re, 'image_re'); + +print "\nimage_re: ${image_re}\n\n"; +print "If this is correct, type 'comirror' to start mirroring\n"; -- cgit v1.2.3