はてなダイアリーローダー
てなわけで、出来ました。通称はやっぱり「はてダロ?」?(キザっぽいな)
はてダラのパッチでは実体参照のサニタイズをしていませんでした(バグですバグ)がこれはちゃんとやってます。
#!/usr/bin/perl # # hl.pl - Hatena Diary Loader. # # Copyright (C) 2004 by Hahahaha. # <rin_ne@big.or.jp> # http://www20.big.or.jp/~rin_ne/ # #-------------- # Original: hw.pl - Hatena Diary Writer. # # Copyright (C) 2004 by Hiroshi Yuki. # <hyuki@hyuki.com> # http://www.hyuki.com/techinfo/hatena_diary_writer.html #-------------- # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. use strict; my $VERSION = "0.3.0"; use LWP::UserAgent; use HTTP::Request::Common; use HTTP::Cookies; use File::Basename; use Getopt::Std; my $enable_encode = eval('use Encode; 1'); # Prototypes. sub login(); sub logout(); sub load_diary_entry($$$); sub load_it($$$); sub sanitize($); sub print_debug(@); sub print_message(@); sub error_exit(@); sub load_config(); # Hatena user id (if empty, I will ask you later). my $username = ''; # Hatena password (if empty, I will ask you later). my $password = ''; # Hatena group name (for hatena group user only). my $groupname = ''; # Default file names. my $cookie_file = 'cookie.txt'; my $config_file = 'config.txt'; # Filter command. # e.g. 'iconv -f euc-jp -t utf-8 %s' # where %s is filename, output is stdout. my $filter_command = ''; # Proxy setting. my $httpproxy = ''; # Encoding. my $c_code = 'euc-jp'; my $s_code = 'euc-jp'; my $hatena_url = 'http://d.hatena.ne.jp'; my %ua_option = ( agent => "HatenaDiaryLoader/$VERSION", # "Mozilla/5.0", timeout => 180, ); my $cookie_jar; my $user_agent; # Handle command-line option. my %cmd_opt = ( 'd' => 0, # "debug" flag. 'u' => "", # "username" option. 'p' => "", # "password" option. 'a' => "", # "agent" option. 'T' => "", # "timeout" option. 'c' => 0, # "cookie" flag. 'g' => "", # "groupname" option. 'n' => "", # "coNfig file" option. ); $Getopt::Std::STANDARD_HELP_VERSION = 1; getopts("du:p:a:T:cg:n:", \%cmd_opt); if ($cmd_opt{d}) { print_debug("Debug flag on."); print_debug("Cookie flag on.") if $cmd_opt{c}; &VERSION_MESSAGE(); } $config_file = $cmd_opt{n} if $cmd_opt{n}; # Override global vars with config file. load_config() if -e($config_file); # Override global vars with command-line options. $username = $cmd_opt{u} if $cmd_opt{u}; $password = $cmd_opt{p} if $cmd_opt{p}; $groupname = $cmd_opt{g} if $cmd_opt{g}; $ua_option{agent} = $cmd_opt{a} if $cmd_opt{a}; $ua_option{timeout} = $cmd_opt{T} if $cmd_opt{T}; # Change $hatena_url to Hatena group URL if ($groupname is defined). if ($groupname) { $hatena_url = "http://$groupname.g.hatena.ne.jp"; } # Start. &main; # no-error exit. exit(0); # Main sequence. sub main { # Check -s option format. error_exit("Illegal argument.") if $ARGV[0] !~ /(\d\d\d\d)-(\d\d)-(\d\d)/; my ($year, $month, $day) = ($1, $2, $3); # Login if necessary. login unless ($user_agent); # Load print_message("Load $year-$month-$day."); load_diary_entry($year,$month,$day); print_message("Load OK."); # Logout if necessary. logout if ($user_agent); } # Login. sub login() { $user_agent = LWP::UserAgent->new(%ua_option); $user_agent->env_proxy; $user_agent->proxy( 'http', $httpproxy ) if $httpproxy; # Ask username if not set. unless ($username) { print "Username: "; chomp($username = <STDIN>); } # If "cookie" flag is on, and cookie file exists, do not login. if ($cmd_opt{c} and -e($cookie_file)) { print_debug("login: Loading cookie jar."); $cookie_jar = HTTP::Cookies->new; $cookie_jar->load($cookie_file); print_debug("login: \$cookie_jar = " . $cookie_jar->as_string); print_message("Skip login."); return; } # Ask password if not set. unless ($password) { print "Password: "; chomp($password = <STDIN>); } my %form; $form{key} = $username; $form{password} = $password; print_message("Login to Hatena as $form{key}."); my $r = $user_agent->simple_request( HTTP::Request::Common::POST("$hatena_url/login", \%form) ); print_debug("login: " . $r->status_line); if (not $r->is_redirect) { error_exit("Login: Unexpected response: ", $r->status_line); } print_message("Login OK."); print_debug("login: Making cookie jar."); $cookie_jar = HTTP::Cookies->new; $cookie_jar->extract_cookies($r); $cookie_jar->save($cookie_file); print_debug("login: \$cookie_jar = " . $cookie_jar->as_string); } # Logout. sub logout() { return unless $user_agent; # If "cookie" flag is on, and cookie file exists, do not logout. if ($cmd_opt{c} and -e($cookie_file)) { print_message("Skip logout."); return; } my %form; $form{key} = $username; $form{password} = $password; print_message("Logout from Hatena as $form{key}."); $user_agent->cookie_jar($cookie_jar); my $r = $user_agent->get("$hatena_url/logout"); print_debug("logout: " . $r->status_line); if (not $r->is_redirect and not $r->is_success) { error_exit("Logout: Unexpected response: ", $r->status_line); } unlink($cookie_file); print_message("Logout OK."); } # Load entry. sub load_diary_entry($$$) { my ($year, $month, $day) = @_; my $load_retry = 0; my $ok = 0; LOAD_RETRY: while ($load_retry < 2) { # Load. $ok = load_it($year, $month, $day); if ($ok or not $cmd_opt{c}) { last; } print_debug("load_diary_entry: LOAD_RETRY."); unlink($cookie_file); print_message("Old cookie. Retry login."); login(); $load_retry++; } if (not $ok) { error_exit("load_diary_entry: get: Check username/password."); } } # Load. sub load_it($$$) { my ($year, $month, $day) = @_; print_debug("load_it: $year-$month-$day."); $user_agent->cookie_jar($cookie_jar); my $r = $user_agent->simple_request( HTTP::Request::Common::GET("$hatena_url/$username/edit?date=$year$month$day")); print_debug("load_it: " . $r->status_line); if (not $r->is_success()) { error_exit("Load: Unexpected response: ", $r->status_line); } # Check entry exist. $r->content =~ /<form .*?action="\.\/edit" .*?>(.*<\/textarea>)/s; my $form_data = $1; $form_data =~ /<input type="hidden" name="date" value="(\d\d\d\d\d\d\d\d)">/; my $resp_date = $1; if($resp_date ne "$year$month$day") { error_exit("Load: Not exist entry."); } # Get title and body. $form_data =~ /<input class="field" name="title" .*?value="(.*?)">/; my $title = $1 . "\n"; $form_data =~ /<textarea .*?>(.*)<\/textarea>/s; my $body = $1; # Escape string. $title = sanitize($title); $body = sanitize($body); if ($enable_encode and ($c_code ne $s_code)) { Encode::from_to($title,$s_code,$c_code); Encode::from_to($body,$s_code,$c_code); } # Save entry to file. my $datename = "$year-$month-$day"; # Check if file is exist. if(-f "$datename.txt") { my $bakext = 0; while(-f "$datename.$bakext") { $bakext++; } if (not rename("$datename.txt", "$datename.$bakext")) { error_exit("$!:$datename.txt"); } } if (not open(OUT, ">$datename.txt")) { error_exit("$!:$datename.txt"); } print OUT $title; print OUT $body; close(OUT); print_debug("load_it: returns 1 (OK)."); return 1; } # Sanitize. sub sanitize($) { my $str = $_[0]; my @escape_string = ( "<<", ">>", ""\"", " ", ); for(@escape_string) { my ($from, $to) = split(/;/); $str =~ s/$from;/$to/sg; } $str =~ s/&#(\d+);/chr($1)/seg; $str =~ s/&/&/sg; return $str; } # Show version message. This is called by getopts. sub VERSION_MESSAGE { print <<"EOD"; Hatena Diary Writer Version $VERSION Copyright (C) 2004 by Hiroshi Yuki. EOD } # Debug print. sub print_debug(@) { if ($cmd_opt{d}) { print "DEBUG: ", @_, "\n"; } } # Print message. sub print_message(@) { print @_, "\n"; } # Error exit. sub error_exit(@) { print "ERROR: ", @_, "\n"; unlink($cookie_file); exit(1); } # Show help message. This is called by getopts. sub HELP_MESSAGE { print <<"EOD"; Usage: perl $0 [Options] Options: --version Show version. --help Show this message. -d Debug. Use this switch for verbose log. -u username Username. Specify username. -p password Password. Specify password. -a agent User agent. Default value is HatenaDiaryWriter/$VERSION. -T seconds Timeout. Default value is 180. -c Cookie. Skip login/logout if $cookie_file exists. -g groupname Groupname. Specify groupname. -n conffile Config file. Specify conffile. Config file example: # # File $config_file in current directory is used to configure hw.pl. # id:yourid password:yourpassword cookie:cookie.txt # g:yourgroup # httpproxy:proxy # c_code:client_encoding # s_code:server_encoding ## for Unix. # filter:iconv -f euc-jp -t utf-8 %s EOD } # Load config file. sub load_config() { print_debug("Loading config file ($config_file)."); if (not open(CONF, $config_file)) { error_exit("Can't open $config_file."); } while (<CONF>) { chomp; if (/^\#/) { # skip comments } elsif (/^id:([^:]+)$/) { $username = $1; print_debug("load_config: id:$username"); } elsif (/^g:([^:]+)$/) { $groupname = $1; print_debug("load_config: g:$groupname"); } elsif (/^password:(.*)$/) { $password = $1; } elsif (/^cookie:(.*)$/) { $cookie_file = glob($1); $cmd_opt{c} = 1; # If cookie file is specified, Assume '-c' is given. print_debug("load_config: cookie:$cookie_file"); } elsif (/^filter:(.*)$/) { $filter_command = $1; print_debug("load_config: filter:$filter_command"); } elsif (/^proxy:(.*)$/) { $httpproxy = $1; print_debug("load_config: proxy:$httpproxy"); } elsif (/^client_encoding:(.*)$/) { $c_code = $1; print_debug("load_config: client_encoding:$c_code"); } elsif (/^server_encoding:(.*)$/) { $s_code = $1; print_debug("load_config: server_encoding:$s_code"); } else { error_exit("Unknown option '$_' in $config_file."); } } close(CONF); }
さて、そろそろLispの世界に戻るか。