polyze/Polyze.pm

280 lines
7.3 KiB
Perl

package Polyze;
use Moose;
use DateTime;
use IO::File;
use List::Util qw( reduce );
use Number::Format qw( format_number );
use Polyze::Line;
use Polyze::H2Early;
use Polyze::HReject;
our $VERSION = "0.19";
# The *_LINE (pseudo-)constants are return values for the handlers'
# scan_line method. Having examined a log line, a handler returns a
# *_LINE value to indicate to self how to treat the current log line.
# The usual treatment is to continue scanning the line by the next
# handler. Sometimes a handler will want scanning of this line to be
# stopped (NEXT_LINE) or to be redone (REDO_LINE). See _scan_log
# below.
#
our $CONTINUE_LINE = 0;
our $NEXT_LINE = 1;
our $REDO_LINE = 2;
has logs => (
is => 'ro',
isa => 'ArrayRef[Str]',
required => 1,
);
has day => (
is => 'ro',
isa => 'DateTime',
required => 1,
);
has day_str => (
is => 'ro',
isa => 'Str',
required => 0,
lazy => 1,
builder => '_build_day_str',
);
has is_day_found => (
is => 'rw',
isa => 'Bool',
required => 0,
default => 0,
);
has handlers => (
is => 'ro',
isa => 'ArrayRef[Polyze::Handler]',
required => 0,
builder => '_build_handlers',
lazy => 1,
);
has elide_domain => (
is => 'ro',
isa => 'Str',
required => 0,
);
has reporters => (
is => 'ro',
isa => 'ArrayRef[Polyze::Reporter]',
required => 0,
default => sub { return []; },
);
has line => (
is => 'ro',
isa => 'Maybe[Polyze::Line]',
required => 0,
default => undef,
writer => '_set_line',
handles => [qw( linenumber )],
);
# E.g. 'Jul 1', note padding.
sub _build_day_str {
my ($self) = @_;
return $self->day->strftime("%b %e");
}
sub _scan_log {
my ($self, $log_file_name) = @_;
my $fh = IO::File->new($log_file_name, 'r') or
die("cannot open '$log_file_name'\n");
::squawk(1, "Scanning log '%s'...\n", $log_file_name);
LOG_LINE:
while (defined (my $line_str = $fh->getline)) {
$self->_set_line(Polyze::Line->new(
filename => $log_file_name,
linenumber => $fh->input_line_number,
text => $line_str,
));
# Have to use a shallow copy of handlers for the scan loop because
# we modify the handlers array within the loop.
my @handlers = @{$self->handlers};
foreach my $h (@handlers) {
my ($lno, $htype) = ($self->linenumber, (ref $h));
my $retcode = $h->scan_line($self->line);
if ($NEXT_LINE == $retcode) {
::squawk(3, "%d %s returned NEXT_LINE\n", $lno, $htype);
next LOG_LINE;
}
elsif ($CONTINUE_LINE == $retcode) {
::squawk(3, "%d %s returned CONTINUE_LINE\n", $lno, $htype);
}
elsif ($REDO_LINE == $retcode) {
::squawk(3, "%d %s returned REDO_LINE\n", $lno, $htype);
redo LOG_LINE;
}
else {
die(sprintf(
"%d %s returned unknown value '%s'", $lno, $htype, $retcode,
));
}
}
}
if (defined $self->line) {
::squawk(1, "%d last line.\n", $self->linenumber);
$self->_set_line(undef);
} else {
::squawk(1, "Empty file: %s\n", $log_file_name);
}
$fh->close;
return;
}
sub _build_handlers {
my ($self) = @_;
return [
Polyze::H2Early->new(polyze => $self),
Polyze::HReject->new(polyze => $self),
];
}
sub remove_handler {
my ($self, $handler) = @_;
my @handlers = @{$self->handlers};
my $idx = 0;
while ($handler != $handlers[$idx]) {
++$idx;
die("handler not found") if ($idx > $#handlers);
}
# Landmine warning: have to splice the deref'd handlers attribute,
# not the @handlers array. The latter splices a *copy* leaving the
# handlers unchanged.
splice(@{$self->handlers}, $idx, 1);
::squawk(1, "%d Removed handler %s\n", $self->linenumber, ref $handler);
return;
}
sub push_handler {
my ($self, $handler) = @_;
push(@{$self->handlers}, $handler);
::squawk(1, "%d Pushed handler %s\n", $self->linenumber, ref $handler);
return;
}
sub unshift_handler {
my ($self, $handler) = @_;
unshift(@{$self->handlers}, $handler);
::squawk(1, "%d Unshifted handler %s\n", $self->linenumber, ref $handler);
return;
}
sub add_reporter {
my ($self, $reporter) = @_;
my $target_reporter = do {
if (my ($r) = grep { $reporter->is_similar($_); } @{$self->reporters}) {
$r->increment_count;
::squawk(2, "%d Incremented %s\n", $self->linenumber, ref $r);
$r;
} else {
push(@{$self->reporters}, $reporter);
::squawk(2, "%d Added %s\n", $self->linenumber, ref $reporter);
$reporter;
}
};
return $target_reporter;
}
sub _is_too_old {
my ($self, $logfilename) = @_;
my $interval_start_epoch = $self->day->epoch;
my $log_mtime_epoch = (stat($logfilename))[9];
my $is_too_old = ($log_mtime_epoch < $interval_start_epoch);
if ($is_too_old) {
::squawk(1, "Skipping log '%s', too old.\n", $logfilename);
}
return $is_too_old;
}
sub scan {
my ($self) = @_;
::squawk(1, "Beginning scan for %s...\n", $self->day_str);
for my $log (@{$self->logs}) {
next if ($self->_is_too_old($log));
$self->_scan_log($log);
}
::squawk(1, "Completed scan.\n");
return $self;
}
sub _shorter_to {
my ($self, $longer_to) = @_;
my $shorter_to = $longer_to;
# For brevity, shorten the recipient address if in the expected domain.
if (my $elide_domain = $self->elide_domain) {
$shorter_to =~ s/\@$elide_domain$//;
}
return $shorter_to;
}
sub _report_rejects {
my ($self) = @_;
my @rejects_all = grep { $_->does('Polyze::RReject'); } @{$self->reporters};
# Partition the rejects by the recipient.
my %by_recip = ();
for my $r (@rejects_all) { push @{$by_recip{$r->to}}, $r; }
# And print them.
for my $recip (sort keys %by_recip) {
printf("\n%s\n", $self->_shorter_to($recip));
my $rejects_recip = $by_recip{$recip};
# Partition this recipient's rejects by reason.
my %by_recip_reason = ();
for my $r (@$rejects_recip) { push @{$by_recip_reason{$r->reason}}, $r; }
for my $reason (sort keys %by_recip_reason) {
print(" $reason\n");
for my $r (@{$by_recip_reason{$reason}}) {
printf(" %s\n", $r->to_str);
}
}
}
# Footer.
my $count_distinct = scalar @rejects_all;
my $count_all = reduce { $a + $b->count } (0, @rejects_all);
if ($count_distinct == $count_all) {
printf("\n%d rejects found.\n", $count_distinct);
} elsif ($count_distinct < $count_all) {
printf("\n%d/%d distinct/all rejects found.\n", $count_distinct, $count_all);
} else {
die sprintf("%d > %d, this is a bug", $count_distinct, $count_all);
}
return;
}
sub _report_line_count {
my ($self) = @_;
my ($rcount) = grep { $_->isa('Polyze::RCount'); } @{$self->reporters};
printf(
"%s log lines for %s.\n",
($rcount ? format_number($rcount->count) : 0),
$self->day->strftime("%A"),
);
}
sub report {
my ($self) = @_;
# Header.
printf("%s's mail rejects.\n", $self->day->strftime("%F %A"));
$self->_report_rejects;
$self->_report_line_count;
# Footer.
printf(
"Report completed %s by %s v%s.\n",
DateTime->now(time_zone => 'local')->strftime("%F %T %Z"),
__PACKAGE__, $VERSION,
);
return;
}
no Moose;
__PACKAGE__->meta->make_immutable;