Plagger::Plugin::Filter::POPFile v0.02

PlaggerConでネタにしたFilter::POPFileの改良版。

package Plagger::Plugin::Filter::POPFile;
use strict;
use base qw( Plagger::Plugin );

our $VERSION = '0.02';

sub register {
    my ($self, $context) = @_;
    $context->register_hook(
        $self,
        'plugin.init'        => \&connect_popfile,
        'update.entry.fixup' => \&filter,
        'update.feed.fixup'  => \&delete,
        'update.fixup'       => \&disconnect_popfile,
    );
}

sub filter {
    my ($self, $context, $args) = @_;

    my (%to_tag, %to_delete);

    @to_tag   { @{ $self->conf->{buckets_to_tag}    || [] } } = ();
    @to_delete{ @{ $self->conf->{buckets_to_delete} || [] } } = ();

    my $entry    = $args->{entry};
    my $filename = write_tmpfile($self, $context, $args);

    my $bucket = $self->{popfile}->call(
        'POPFile/API.classify',
        $self->{popfile_session},
        $filename
    )->result;

    $context->log(debug => $entry->permalink . ": $bucket");

    if (exists $to_delete{$bucket}) {
        $context->log(debug => $entry->permalink . 'will be deleted');

        $entry->add_tag('will_be_deleted_by_POPFile');
    }

    if (
        exists $to_tag{$bucket} or
        $self->conf->{tag_all} && $bucket ne 'unclassified'
    ) {
        $context->log(debug => $entry->permalink . 'is tagged');

        $entry->title("[$bucket] " . $entry->title) if $self->conf->{add_tag_to_title};
        $entry->add_tag($bucket);
    }
}

sub delete {
    my ($self, $context, $args) = @_;

    foreach my $entry ($args->{feed}->entries) {
        if ($entry->has_tag('will_be_deleted_by_POPFile')) {
            $context->log(debug => 'spam deleted: ' . $args->{feed}->permalink);
            $args->{feed}->delete_entry($entry);
        }
    }
}

# XXX: these three functions are in common and should be separate.

use XMLRPC::Lite;
use File::Temp ();
use Encode;

sub connect_popfile {
    my ($self, $context, $args) = @_;

    $context->log(debug => "hello, POPFile");
    $self->{popfile} = XMLRPC::Lite->proxy($self->conf->{proxy});
    $self->{popfile_session} = $self->{popfile}->call(
        'POPFile/API.get_session_key',
        'admin',
        ''
    )->result;

    $context->log(debug => "session: $self->{popfile_session}");

    $self->{popfile_tempdir} = File::Temp::tempdir(
        CLEANUP => 1,
    );
}

sub disconnect_popfile {
    my ($self, $context, $args) = @_;

    $context->log(debug => "good-bye, POPFile");
    $self->{popfile}->call(
         'POPFile/API.release_session_key',
         $self->{popfile_session}
    );
}

sub write_tmpfile {
    my ($self, $context, $args) = @_;

    my $encoding  = $self->conf->{encoding} || 'utf8';
    my $entry     = $args->{entry};
    my $text      = $entry->body_text;

    my ($fh, $filename) = File::Temp::tempfile(
        DIR => $self->{popfile_tempdir},
    );

    print $fh
        'From: (', $entry->permalink, ') <plagger@localhost>', "\n",
        'To: <plagger@localhost>', "\n",
        'Subject: ', encode($encoding, $entry->title), "\n\n",
        encode($encoding, $text), "\n";
    close $fh;

    return $filename;
}

1;

__END__

=head1 NAME

Plagger::Plugin::Filter::POPFile - Categorize entries as spam et al

=head1 SYNOPSIS

  - module: Filter::POPFile
    config:
      proxy: http://localhost:8081/RPC2
      encoding: euc-jp
      buckets_to_tag:
        - perl
        - plagger
      buckets_to_delete:
        - spam
      tag_all: 0
      add_tag_to_title: 1

=head1 CONFIG

=over 4

=item proxy

Your POPFile proxy URL.

=item encoding

Your POPFile encoding. Specify 'euc-jp' for Nihongo users.

=item buckets_to_tag, tag_all

Your POPFile buckets to tag. If you want all entries to have their
bucket name, then set C<tag_all> to true.

=item buckets_to_delete

Your POPFile buckets for spam entries. These entries will be filtered out
before publishing.

=item add_tag_to_title

If set to true, the bucket name (enclosed in brackets) will be added
to the entry's title.

=back

=head1 AUTHOR

Kenichi Ishigaki

=head1 SEE ALSO

L<Plagger>, POPFile

=cut