はてなダイアリーローダー

てなわけで、出来ました。通称はやっぱり「はてダロ?」?(キザっぽいな)
はてダラのパッチでは実体参照サニタイズをしていませんでした(バグですバグ)がこれはちゃんとやってます。

#!/usr/bin/perl
#
# hl.pl - Hatena Diary Loader.
#
# Copyright (C) 2004 by Hahahaha.
# <rin_ne@big.or.jp>
# http://www20.big.or.jp/~rin_ne/
#
#--------------
# Original: hw.pl - Hatena Diary Writer.
#
# Copyright (C) 2004 by Hiroshi Yuki.
# <hyuki@hyuki.com>
# http://www.hyuki.com/techinfo/hatena_diary_writer.html
#--------------
#
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.

use strict;
my $VERSION = "0.3.0";

use LWP::UserAgent;
use HTTP::Request::Common;
use HTTP::Cookies;
use File::Basename;
use Getopt::Std;

my $enable_encode = eval('use Encode; 1');

# Prototypes.
sub login();
sub logout();
sub load_diary_entry($$$);
sub load_it($$$);
sub sanitize($);
sub print_debug(@);
sub print_message(@);
sub error_exit(@);
sub load_config();

# Hatena user id (if empty, I will ask you later).
my $username = '';
# Hatena password (if empty, I will ask you later).
my $password = '';
# Hatena group name (for hatena group user only).
my $groupname = '';

# Default file names.
my $cookie_file = 'cookie.txt';
my $config_file = 'config.txt';

# Filter command.
# e.g. 'iconv -f euc-jp -t utf-8 %s'
# where %s is filename, output is stdout.
my $filter_command = '';

# Proxy setting.
my $httpproxy = '';

# Encoding.
my $c_code = 'euc-jp';
my $s_code = 'euc-jp';

my $hatena_url = 'http://d.hatena.ne.jp';

my %ua_option = (
    agent => "HatenaDiaryLoader/$VERSION", # "Mozilla/5.0",
    timeout => 180,
);

my $cookie_jar;
my $user_agent;

# Handle command-line option.
my %cmd_opt = (
    'd' => 0,   # "debug" flag.
    'u' => "",  # "username" option.
    'p' => "",  # "password" option.
    'a' => "",  # "agent" option.
    'T' => "",  # "timeout" option.
    'c' => 0,   # "cookie" flag.
    'g' => "",  # "groupname" option.
    'n' => "",  # "coNfig file" option.
);

$Getopt::Std::STANDARD_HELP_VERSION = 1;
getopts("du:p:a:T:cg:n:", \%cmd_opt);

if ($cmd_opt{d}) {
    print_debug("Debug flag on.");
    print_debug("Cookie flag on.") if $cmd_opt{c};
    &VERSION_MESSAGE();
}

$config_file = $cmd_opt{n} if $cmd_opt{n};

# Override global vars with config file.
load_config() if -e($config_file);

# Override global vars with command-line options.
$username = $cmd_opt{u} if $cmd_opt{u};
$password = $cmd_opt{p} if $cmd_opt{p};
$groupname = $cmd_opt{g} if $cmd_opt{g};
$ua_option{agent} = $cmd_opt{a} if $cmd_opt{a};
$ua_option{timeout} = $cmd_opt{T} if $cmd_opt{T};

# Change $hatena_url to Hatena group URL if ($groupname is defined).
if ($groupname) {
    $hatena_url = "http://$groupname.g.hatena.ne.jp";
}

# Start.
&main;

# no-error exit.
exit(0);

# Main sequence.
sub main {

    # Check -s option format.
    error_exit("Illegal argument.") if $ARGV[0] !~ /(\d\d\d\d)-(\d\d)-(\d\d)/;
    my ($year, $month, $day) = ($1, $2, $3);

    # Login if necessary.
    login unless ($user_agent);

    # Load
    print_message("Load $year-$month-$day.");
    load_diary_entry($year,$month,$day);
    print_message("Load OK.");

    # Logout if necessary.
    logout if ($user_agent);
}

# Login.
sub login() {
    $user_agent = LWP::UserAgent->new(%ua_option);
    $user_agent->env_proxy;
    $user_agent->proxy( 'http', $httpproxy ) if $httpproxy;

    # Ask username if not set.
    unless ($username) {
        print "Username: ";
        chomp($username = <STDIN>);
    }

    # If "cookie" flag is on, and cookie file exists, do not login.
    if ($cmd_opt{c} and -e($cookie_file)) {
        print_debug("login: Loading cookie jar.");

        $cookie_jar = HTTP::Cookies->new;
        $cookie_jar->load($cookie_file);

        print_debug("login: \$cookie_jar = " . $cookie_jar->as_string);

        print_message("Skip login.");

        return;
    }

    # Ask password if not set.
    unless ($password) {
        print "Password: ";
        chomp($password = <STDIN>);
    }

    my %form;
    $form{key} = $username;
    $form{password} = $password;

    print_message("Login to Hatena as $form{key}.");

    my $r = $user_agent->simple_request(
        HTTP::Request::Common::POST("$hatena_url/login", \%form)
    );

    print_debug("login: " . $r->status_line);

    if (not $r->is_redirect) {
        error_exit("Login: Unexpected response: ", $r->status_line);
    }

    print_message("Login OK.");

    print_debug("login: Making cookie jar.");

    $cookie_jar = HTTP::Cookies->new;
    $cookie_jar->extract_cookies($r);
    $cookie_jar->save($cookie_file);

    print_debug("login: \$cookie_jar = " . $cookie_jar->as_string);
}

# Logout.
sub logout() {
    return unless $user_agent;

    # If "cookie" flag is on, and cookie file exists, do not logout.
    if ($cmd_opt{c} and -e($cookie_file)) {
        print_message("Skip logout.");
        return;
    }

    my %form;
    $form{key} = $username;
    $form{password} = $password;

    print_message("Logout from Hatena as $form{key}.");

    $user_agent->cookie_jar($cookie_jar);
    my $r = $user_agent->get("$hatena_url/logout");
    print_debug("logout: " . $r->status_line);

    if (not $r->is_redirect and not $r->is_success) {
        error_exit("Logout: Unexpected response: ", $r->status_line);
    }

    unlink($cookie_file);

    print_message("Logout OK.");
}

# Load entry.
sub load_diary_entry($$$) {
    my ($year, $month, $day) = @_;
    my $load_retry = 0;
    my $ok = 0;

LOAD_RETRY:
    while ($load_retry < 2) {
        # Load.
        $ok = load_it($year, $month, $day);
        if ($ok or not $cmd_opt{c}) {
            last;
        }
        print_debug("load_diary_entry: LOAD_RETRY.");
        unlink($cookie_file);
        print_message("Old cookie. Retry login.");
        login();
        $load_retry++;
    }

    if (not $ok) {
        error_exit("load_diary_entry: get: Check username/password.");
    }
}

# Load.
sub load_it($$$) {
    my ($year, $month, $day) = @_;

    print_debug("load_it: $year-$month-$day.");

    $user_agent->cookie_jar($cookie_jar);

    my $r = $user_agent->simple_request(
        HTTP::Request::Common::GET("$hatena_url/$username/edit?date=$year$month$day"));

    print_debug("load_it: " . $r->status_line);

    if (not $r->is_success()) {
        error_exit("Load: Unexpected response: ", $r->status_line);
    }

    # Check entry exist.
    $r->content =~ /<form .*?action="\.\/edit" .*?>(.*<\/textarea>)/s;
    my $form_data = $1;

    $form_data =~ /<input type="hidden" name="date" value="(\d\d\d\d\d\d\d\d)">/;
    my $resp_date = $1;

    if($resp_date ne "$year$month$day") {
        error_exit("Load: Not exist entry.");
    }
    
    # Get title and body.
    $form_data =~ /<input class="field" name="title" .*?value="(.*?)">/;
    my $title = $1 . "\n";
    $form_data =~ /<textarea .*?>(.*)<\/textarea>/s;
    my $body = $1;
    
    # Escape string.
    $title = sanitize($title);
    $body = sanitize($body);
    
    if ($enable_encode and ($c_code ne $s_code)) {
        Encode::from_to($title,$s_code,$c_code);
        Encode::from_to($body,$s_code,$c_code);
    }
    # Save entry to file.
    my $datename = "$year-$month-$day";
    
    # Check if file is exist.
    if(-f "$datename.txt") {
        my $bakext = 0;
        while(-f "$datename.$bakext") {
            $bakext++;
        }
        if (not rename("$datename.txt", "$datename.$bakext")) {
            error_exit("$!:$datename.txt");
        }
    }
    
    if (not open(OUT, ">$datename.txt")) {
        error_exit("$!:$datename.txt");
    }
    
    print OUT $title;
    print OUT $body;
    close(OUT);
    
    print_debug("load_it: returns 1 (OK).");
    return 1;
}

# Sanitize.
sub sanitize($) {
    my $str = $_[0];

    my @escape_string = (
        "&lt;<",
        "&gt;>",
        "&quot;\"",
        "&nbsp; ",
    );
    
    for(@escape_string) {
        my ($from, $to) = split(/;/);
        $str =~ s/$from;/$to/sg;
    }
    
    $str =~ s/&#(\d+);/chr($1)/seg;
    $str =~ s/&amp;/&/sg;
    
    return $str;
}

# Show version message. This is called by getopts.
sub VERSION_MESSAGE {
    print <<"EOD";
Hatena Diary Writer Version $VERSION
Copyright (C) 2004 by Hiroshi Yuki.
EOD
}

# Debug print.
sub print_debug(@) {
    if ($cmd_opt{d}) {
        print "DEBUG: ", @_, "\n";
    }
}

# Print message.
sub print_message(@) {
    print @_, "\n";
}

# Error exit.
sub error_exit(@) {
    print "ERROR: ", @_, "\n";
    unlink($cookie_file);
    exit(1);
}

# Show help message. This is called by getopts.
sub HELP_MESSAGE {
    print <<"EOD";

Usage: perl $0 [Options]

Options:
    --version       Show version.
    --help          Show this message.
    -d              Debug. Use this switch for verbose log.
    -u username     Username. Specify username.
    -p password     Password. Specify password.
    -a agent        User agent. Default value is HatenaDiaryWriter/$VERSION.
    -T seconds      Timeout. Default value is 180.
    -c              Cookie. Skip login/logout if $cookie_file exists.
    -g groupname    Groupname. Specify groupname.
    -n conffile     Config file. Specify conffile.

Config file example:
#
# File $config_file in current directory is used to configure hw.pl.
#
id:yourid
password:yourpassword
cookie:cookie.txt
# g:yourgroup
# httpproxy:proxy
# c_code:client_encoding
# s_code:server_encoding
## for Unix.
# filter:iconv -f euc-jp -t utf-8 %s
EOD
}

# Load config file.
sub load_config() {
    print_debug("Loading config file ($config_file).");
    if (not open(CONF, $config_file)) {
        error_exit("Can't open $config_file.");
    }
    while (<CONF>) {
        chomp;
        if (/^\#/) {
            # skip comments
        } elsif (/^id:([^:]+)$/) {
            $username = $1;
            print_debug("load_config: id:$username");
        } elsif (/^g:([^:]+)$/) {
            $groupname = $1;
            print_debug("load_config: g:$groupname");
        } elsif (/^password:(.*)$/) {
            $password = $1;
        } elsif (/^cookie:(.*)$/) {
            $cookie_file = glob($1);
            $cmd_opt{c} = 1; # If cookie file is specified, Assume '-c' is given.
            print_debug("load_config: cookie:$cookie_file");
        } elsif (/^filter:(.*)$/) {
            $filter_command = $1;
            print_debug("load_config: filter:$filter_command");
        } elsif (/^proxy:(.*)$/) {
            $httpproxy = $1;
            print_debug("load_config: proxy:$httpproxy");
        } elsif (/^client_encoding:(.*)$/) {
            $c_code = $1;
            print_debug("load_config: client_encoding:$c_code");
        } elsif (/^server_encoding:(.*)$/) {
            $s_code = $1;
            print_debug("load_config: server_encoding:$s_code");
        } else {
            error_exit("Unknown option '$_' in $config_file.");
        }
    }
    close(CONF);
}

さて、そろそろLispの世界に戻るか。