package Polyze; use Moose; use DateTime; use IO::File; use List::Util qw( reduce ); use Number::Format qw( format_number ); use Polyze::Line; use Polyze::H2Early; use Polyze::HReject; our $VERSION = "0.19"; # The *_LINE (pseudo-)constants are return values for the handlers' # scan_line method. Having examined a log line, a handler returns a # *_LINE value to indicate to self how to treat the current log line. # The usual treatment is to continue scanning the line by the next # handler. Sometimes a handler will want scanning of this line to be # stopped (NEXT_LINE) or to be redone (REDO_LINE). See _scan_log # below. # our $CONTINUE_LINE = 0; our $NEXT_LINE = 1; our $REDO_LINE = 2; has logs => ( is => 'ro', isa => 'ArrayRef[Str]', required => 1, ); has day => ( is => 'ro', isa => 'DateTime', required => 1, ); has day_str => ( is => 'ro', isa => 'Str', required => 0, lazy => 1, builder => '_build_day_str', ); has is_day_found => ( is => 'rw', isa => 'Bool', required => 0, default => 0, ); has handlers => ( is => 'ro', isa => 'ArrayRef[Polyze::Handler]', required => 0, builder => '_build_handlers', lazy => 1, ); has elide_domain => ( is => 'ro', isa => 'Str', required => 0, ); has reporters => ( is => 'ro', isa => 'ArrayRef[Polyze::Reporter]', required => 0, default => sub { return []; }, ); has line => ( is => 'ro', isa => 'Maybe[Polyze::Line]', required => 0, default => undef, writer => '_set_line', handles => [qw( linenumber )], ); # E.g. 'Jul 1', note padding. sub _build_day_str { my ($self) = @_; return $self->day->strftime("%b %e"); } sub _scan_log { my ($self, $log_file_name) = @_; my $fh = IO::File->new($log_file_name, 'r') or die("cannot open '$log_file_name'\n"); ::squawk(1, "Scanning log '%s'...\n", $log_file_name); LOG_LINE: while (defined (my $line_str = $fh->getline)) { $self->_set_line(Polyze::Line->new( filename => $log_file_name, linenumber => $fh->input_line_number, text => $line_str, )); # Have to use a shallow copy of handlers for the scan loop because # we modify the handlers array within the loop. my @handlers = @{$self->handlers}; foreach my $h (@handlers) { my ($lno, $htype) = ($self->linenumber, (ref $h)); my $retcode = $h->scan_line($self->line); if ($NEXT_LINE == $retcode) { ::squawk(3, "%d %s returned NEXT_LINE\n", $lno, $htype); next LOG_LINE; } elsif ($CONTINUE_LINE == $retcode) { ::squawk(3, "%d %s returned CONTINUE_LINE\n", $lno, $htype); } elsif ($REDO_LINE == $retcode) { ::squawk(3, "%d %s returned REDO_LINE\n", $lno, $htype); redo LOG_LINE; } else { die(sprintf( "%d %s returned unknown value '%s'", $lno, $htype, $retcode, )); } } } if (defined $self->line) { ::squawk(1, "%d last line.\n", $self->linenumber); $self->_set_line(undef); } else { ::squawk(1, "Empty file: %s\n", $log_file_name); } $fh->close; return; } sub _build_handlers { my ($self) = @_; return [ Polyze::H2Early->new(polyze => $self), Polyze::HReject->new(polyze => $self), ]; } sub remove_handler { my ($self, $handler) = @_; my @handlers = @{$self->handlers}; my $idx = 0; while ($handler != $handlers[$idx]) { ++$idx; die("handler not found") if ($idx > $#handlers); } # Landmine warning: have to splice the deref'd handlers attribute, # not the @handlers array. The latter splices a *copy* leaving the # handlers unchanged. splice(@{$self->handlers}, $idx, 1); ::squawk(1, "%d Removed handler %s\n", $self->linenumber, ref $handler); return; } sub push_handler { my ($self, $handler) = @_; push(@{$self->handlers}, $handler); ::squawk(1, "%d Pushed handler %s\n", $self->linenumber, ref $handler); return; } sub unshift_handler { my ($self, $handler) = @_; unshift(@{$self->handlers}, $handler); ::squawk(1, "%d Unshifted handler %s\n", $self->linenumber, ref $handler); return; } sub add_reporter { my ($self, $reporter) = @_; my $target_reporter = do { if (my ($r) = grep { $reporter->is_similar($_); } @{$self->reporters}) { $r->increment_count; ::squawk(2, "%d Incremented %s\n", $self->linenumber, ref $r); $r; } else { push(@{$self->reporters}, $reporter); ::squawk(2, "%d Added %s\n", $self->linenumber, ref $reporter); $reporter; } }; return $target_reporter; } sub _is_too_old { my ($self, $logfilename) = @_; my $interval_start_epoch = $self->day->epoch; my $log_mtime_epoch = (stat($logfilename))[9]; my $is_too_old = ($log_mtime_epoch < $interval_start_epoch); if ($is_too_old) { ::squawk(1, "Skipping log '%s', too old.\n", $logfilename); } return $is_too_old; } sub scan { my ($self) = @_; ::squawk(1, "Beginning scan for %s...\n", $self->day_str); for my $log (@{$self->logs}) { next if ($self->_is_too_old($log)); $self->_scan_log($log); } ::squawk(1, "Completed scan.\n"); return $self; } sub _shorter_to { my ($self, $longer_to) = @_; my $shorter_to = $longer_to; # For brevity, shorten the recipient address if in the expected domain. if (my $elide_domain = $self->elide_domain) { $shorter_to =~ s/\@$elide_domain$//; } return $shorter_to; } sub _report_rejects { my ($self) = @_; my @rejects_all = grep { $_->does('Polyze::RReject'); } @{$self->reporters}; # Partition the rejects by the recipient. my %by_recip = (); for my $r (@rejects_all) { push @{$by_recip{$r->to}}, $r; } # And print them. for my $recip (sort keys %by_recip) { printf("\n%s\n", $self->_shorter_to($recip)); my $rejects_recip = $by_recip{$recip}; # Partition this recipient's rejects by reason. my %by_recip_reason = (); for my $r (@$rejects_recip) { push @{$by_recip_reason{$r->reason}}, $r; } for my $reason (sort keys %by_recip_reason) { print(" $reason\n"); for my $r (@{$by_recip_reason{$reason}}) { printf(" %s\n", $r->to_str); } } } # Footer. my $count_distinct = scalar @rejects_all; my $count_all = reduce { $a + $b->count } (0, @rejects_all); if ($count_distinct == $count_all) { printf("\n%d rejects found.\n", $count_distinct); } elsif ($count_distinct < $count_all) { printf("\n%d/%d distinct/all rejects found.\n", $count_distinct, $count_all); } else { die sprintf("%d > %d, this is a bug", $count_distinct, $count_all); } return; } sub _report_line_count { my ($self) = @_; my ($rcount) = grep { $_->isa('Polyze::RCount'); } @{$self->reporters}; printf( "%s log lines for %s.\n", ($rcount ? format_number($rcount->count) : 0), $self->day->strftime("%A"), ); } sub report { my ($self) = @_; # Header. printf("%s's mail rejects.\n", $self->day->strftime("%F %A")); $self->_report_rejects; $self->_report_line_count; # Footer. printf( "Report completed %s by %s v%s.\n", DateTime->now(time_zone => 'local')->strftime("%F %T %Z"), __PACKAGE__, $VERSION, ); return; } no Moose; __PACKAGE__->meta->make_immutable;