Plagger::Plugin::Filter::PDIC

PPF::Babelfishはフィードのテキストとタイトルを機械翻訳してしまえ、というフィルタでしたが、いまのところ日/英の機械翻訳なんてさほどあてになりませんので辞書引きだけしてもらいましょう、というのが今度のフィルタ。PDICを使っているので例によってWin32限定ですが、実用性は皆無なんで気にすることもありますまい。Babelfishと違ってBanを食らうことがないってのが唯一の取り柄。
コードを載せる前に英辞郎をかませた結果だけ書いておくと、こんな感じ。素のまま長いフィードを食わせると脚注部分がすごいことになるので、適宜min_levelとかnon_zeroとか設定してくださいな、と。

title: Charlie's Cocktail BAR
link: http://www.charsbar.com/japanese/
modified: 2006-03-06T23:40:00
generator: Plagger/0.5.5

(中略)

title: Tang a rita / フェアリー・ウイスパー / fairy whiskey

fairy (3)
【@】フェアリー、【変化】《複》fairies、【分節】fair・y
【名-1】妖精
【名-2】同性愛の男性
【形】妖精の

whiskey (2)
【@】ウィスキー、ウイスキー、【変化】《複》whiskeys、【分節】whis・key
【名】ウイスキー
link: http://www.charsbar.com/japanese/index.cgi/update/20060219
issued: 2006-02-19T00:00:00
author: Charlie

(中略)

title: Smoked Heather Dram / ウオッカサキ / ワサビ・ウオッカ

heather (11)
【@】ヘザー、【分節】heath・er
【名】《植物》ヒース
link: http://www.charsbar.com/japanese/index.cgi/update/20051208
issued: 2005-12-08T00:00:00
author: Charlie

で、以下コード。

package Plagger::Plugin::Filter::PDIC;
use strict;
use base qw( Plagger::Plugin );

use Win32::SearchPDIC;
use Encode;

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'update.entry.fixup' => \&update,
    );
}

sub get_notes {
    my ($self, $pdic, $text, $args) = @_;

    my @notes = ();
    foreach my $word (split /\s+/, $text) {
        my $note = $pdic->find($word, $args);
        $note =~ s|\015||g;
        push @notes, decode('shift_jis', $note) if $note;
    }
    return join "\n\n", $text, @notes;
}

sub update {
    my($self, $context, $args) = @_;

    my $pdic = Win32::SearchPDIC->connect;
    $pdic->open_dict;

    $args->{entry}->title(
        $self->get_notes(
            $pdic, 
            $args->{entry}->title, 
            $self->conf,
        )
    );

    $args->{entry}->body(
        $self->get_notes(
            $pdic,
            $args->{entry}->body,
            $self->conf,
        )
    );

    $pdic->disconnect;
}

1;

__END__

=head1 NAME

Plagger::Plugin::Filter::PDIC - help reading English feeds via Win32::SearchPDIC

=head1 SYNOPSIS

  - module: Filter::PDIC
    config:
      min_level: 5
      non_zero:  1

=head1 DESCRIPTION

This plugin parses feeds and fetches meanings of each word (if possible) through Win32::SearchPDIC.

=head1 CONFIG

=over 4

=item min_level

If set to non-zero (1-15) value, PDIC will ignore less-than-the-level
words. Defaults to 0.

=item non_zero

If set to true, PDIC will ignore 0-level words. Defaults to 0.

=back

=head1 AUTHOR

Kenichi Ishigaki

=head1 SEE ALSO

L<Plagger>, L<Win32::SearchPDIC>

=cut

ついでにWin32::SearchPDICもアップデート。英辞郎の単語レベルでフィルタリングできるように。

package Win32::SearchPDIC;
{
  use strict;
  use warnings;
  use Carp;

  our $VERSION = '0.02';

  use Win32::DDE::Client;

  sub connect {
    my ($class, $args) = @_;

    my $server = $args->{server} || 'PDICW';
    my $wait   = $args->{wait}   || 1;

    my ($process, $window);
    if ($args->{force}) {
#      $process = _create_process();
      $window  = _create_window();
      sleep $wait;
    }

    # XXX: currently PDICW must be ready before DDE conversation.
    # Tried several approaches but failed to prepare PDIC on the fly.
    # (Some failed to stay minimized; others failed to talk properly.)

    my $pdic = Win32::DDE::Client->new($server, 'Dictionary');

    croak 'failed to connect PDIC' if !$pdic || $pdic->Error;

    bless {
      pdic => $pdic,
      process => $process,
      window => $window
    }, $class;
  }

  sub open_dict {
    my $self = shift;

    my $pdic = $self->{pdic};

    # XXX: Win32::DDE::Callback complains on the second DDE try.
    # XXX: should accept Open options?

    eval {
        $pdic->Poke('Open', '');
        $pdic->Poke('Config', 't3');
        $pdic->Poke('Format', '$w ($l)\n$j');
    };
    $self->disconnect("failed to open dictionary: $@") if $@;

    $self->{opened} = 1;
  }

  sub find {
    my ($self, $word, $args) = @_;

    # check lowercased word first
    my $value = $self->_find(lc $word, $args);

    # check original word (with some uppercase characters) if necessary
    $value = $self->_find($word, $args) if !$value && $word ne lc $word;

    return $value;
  }

  sub _find {
    my ($self, $word, $args) = @_;

    my $pdic = $self->{pdic} or croak 'not available';

    if ( $pdic->Poke('Find', $word) ) {
      my $value = $pdic->Request('Find');

      my ($level) = $value =~ /\((\d+)\)/;

      # check 'min_level'
      return if $args->{min_level} && $level && $level < $args->{min_level};

      # check 'no_zero'
      return if $args->{non_zero} && !$level;

      return $value;
    }
    return;
  }

  sub _find_pdic {
    my $path;

    eval {
      use Win32API::Registry qw( :ALL );

      my $hKey;
      RegOpenKeyEx( HKEY_LOCAL_MACHINE, "SOFTWARE\\ReliefOffice\\CurrentVersion\\App Paths\\PDICW32.EXE", 0, KEY_READ, $hKey )
        or croak "failed to locate PDICW32.exe".regLastError();

      my $type;
      RegQueryValueEx( $hKey, "", [], $type, $path, [] )
        or confess "failed to read PDICW32.exe info".regLastError();

      RegCloseKey( $hKey )
        or confess "failed to close Registry".regLastError();

    };
    croak $@ if $@;
    return $path;
  }

  sub _create_process {
    my $process;

    my $pdic_path = _find_pdic();

    eval {
      use Win32::Process;
      use Win32 ();
      Win32::Process::Create(
        $process, $pdic_path, '', 0, CREATE_NO_WINDOW, '.'
      ) or croak Win32::FormatMessage( Win32::GetLastError() );
    };

    $process;
  }

  sub _create_window {
    my $pdic_path = _find_pdic();

    eval {
      use Win32::FileOp qw(ShellExecute SW_HIDE SW_MINIMIZE SW_SHOWMINIMIZED);
      use Win32 ();

      ShellExecute 'open' => $pdic_path, { show => SW_SHOWMINIMIZED };
      croak Win32::FormatMessage(Win32::GetLastError()) if $^E;
    };
    croak $@ if $@;

    return 1;
  }

  sub _kill_process {
    my $process = shift;
    eval {
      my $exitcode;
      $process->GetExitCode($exitcode);
      $process->Kill($exitcode);
    };
    croak $@ if $@;
  }

  sub disconnect {
    my ($self, $error) = @_;

    warn $error if $error;

    if ($self->{pdic}) {
      $self->{pdic}->Poke('Close', '') if $self->{opened};
      $self->{pdic}->Disconnect;
    }
    _kill_process($self->{process})    if $self->{process};
  }

  sub DESTROY { $_[0]->disconnect; }
}

1;
__END__

=head1 NAME

Win32::SearchPDIC - Search PDIC via DDE

=head1 SYNOPSIS

  use Win32::SearchPDIC;

  my $pdic = Win32::SearchPDIC->connect({ force => 1 });
  $pdic->open_dict;

  my $meanings = $pdic->find("test");

  $pdic->disconnect;

=head1 SEE ALSO

L<http://homepage3.nifty.com/TaN/> for PDIC (Personal Dictionary).

L<Win32::DDE::Client>

=head1 AUTHOR

Kenichi Ishigaki

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006 by Kenichi Ishigaki

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut