diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b81c3c9 --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ + +DESTDIR ?= $(CURDIR) +ifneq ($(realpath $(DESTDIR)),$(DESTDIR)) +$(error DESTDIR=$(DESTDIR) not valid, it must be existent and absolute) +endif + +# Locations for installing. +LIBDIR := $(DESTDIR)/usr/local/lib/site_perl +BINDIR := $(DESTDIR)/usr/local/sbin +CONFDIR := $(DESTDIR)/etc/cron.daily + +$(LIBDIR) $(BINDIR) $(CONFDIR): + install --mode=0755 -d $@ + +# Installing. This should be simpler. +install: $(LIBDIR) $(BINDIR) $(CONFDIR) + install --mode=0644 Polyze.pm $(LIBDIR) + cp -a Polyze $(LIBDIR) + chmod u=rwx,go=rx $(LIBDIR)/Polyze + chmod u=rw,go=r $(LIBDIR)/Polyze/* + install --mode=0755 polyze send-polyze-daily $(BINDIR) + install --mode=0755 polyze.cron.daily $(CONFDIR)/polyze diff --git a/Polyze.pm b/Polyze.pm new file mode 100644 index 0000000..2dd5d6e --- /dev/null +++ b/Polyze.pm @@ -0,0 +1,279 @@ +package Polyze; +use Moose; +use DateTime; +use IO::File; +use List::Util qw( reduce ); +use Number::Format qw( format_number ); +use Polyze::Line; +use Polyze::H2Early; +use Polyze::HReject; + +our $VERSION = "0.19"; + +# The *_LINE (pseudo-)constants are return values for the handlers' +# scan_line method. Having examined a log line, a handler returns a +# *_LINE value to indicate to self how to treat the current log line. +# The usual treatment is to continue scanning the line by the next +# handler. Sometimes a handler will want scanning of this line to be +# stopped (NEXT_LINE) or to be redone (REDO_LINE). See _scan_log +# below. +# +our $CONTINUE_LINE = 0; +our $NEXT_LINE = 1; +our $REDO_LINE = 2; + +has logs => ( + is => 'ro', + isa => 'ArrayRef[Str]', + required => 1, +); + +has day => ( + is => 'ro', + isa => 'DateTime', + required => 1, +); + +has day_str => ( + is => 'ro', + isa => 'Str', + required => 0, + lazy => 1, + builder => '_build_day_str', +); + +has is_day_found => ( + is => 'rw', + isa => 'Bool', + required => 0, + default => 0, +); + +has handlers => ( + is => 'ro', + isa => 'ArrayRef[Polyze::Handler]', + required => 0, + builder => '_build_handlers', + lazy => 1, +); + +has elide_domain => ( + is => 'ro', + isa => 'Str', + required => 0, +); + +has reporters => ( + is => 'ro', + isa => 'ArrayRef[Polyze::Reporter]', + required => 0, + default => sub { return []; }, +); + +has line => ( + is => 'ro', + isa => 'Maybe[Polyze::Line]', + required => 0, + default => undef, + writer => '_set_line', + handles => [qw( linenumber )], +); + +# E.g. 'Jul 1', note padding. +sub _build_day_str { + my ($self) = @_; + return $self->day->strftime("%b %e"); +} + +sub _scan_log { + my ($self, $log_file_name) = @_; + my $fh = IO::File->new($log_file_name, 'r') or + die("cannot open '$log_file_name'\n"); + ::squawk(1, "Scanning log '%s'...\n", $log_file_name); + LOG_LINE: + while (defined (my $line_str = $fh->getline)) { + $self->_set_line(Polyze::Line->new( + filename => $log_file_name, + linenumber => $fh->input_line_number, + text => $line_str, + )); + # Have to use a shallow copy of handlers for the scan loop because + # we modify the handlers array within the loop. + my @handlers = @{$self->handlers}; + foreach my $h (@handlers) { + my ($lno, $htype) = ($self->linenumber, (ref $h)); + my $retcode = $h->scan_line($self->line); + if ($NEXT_LINE == $retcode) { + ::squawk(3, "%d %s returned NEXT_LINE\n", $lno, $htype); + next LOG_LINE; + } + elsif ($CONTINUE_LINE == $retcode) { + ::squawk(3, "%d %s returned CONTINUE_LINE\n", $lno, $htype); + } + elsif ($REDO_LINE == $retcode) { + ::squawk(3, "%d %s returned REDO_LINE\n", $lno, $htype); + redo LOG_LINE; + } + else { + die(sprintf( + "%d %s returned unknown value '%s'", $lno, $htype, $retcode, + )); + } + } + } + if (defined $self->line) { + ::squawk(1, "%d last line.\n", $self->linenumber); + $self->_set_line(undef); + } else { + ::squawk(1, "Empty file: %s\n", $log_file_name); + } + $fh->close; + return; +} + +sub _build_handlers { + my ($self) = @_; + return [ + Polyze::H2Early->new(polyze => $self), + Polyze::HReject->new(polyze => $self), + ]; +} + +sub remove_handler { + my ($self, $handler) = @_; + my @handlers = @{$self->handlers}; + my $idx = 0; + while ($handler != $handlers[$idx]) { + ++$idx; + die("handler not found") if ($idx > $#handlers); + } + # Landmine warning: have to splice the deref'd handlers attribute, + # not the @handlers array. The latter splices a *copy* leaving the + # handlers unchanged. + splice(@{$self->handlers}, $idx, 1); + ::squawk(1, "%d Removed handler %s\n", $self->linenumber, ref $handler); + return; +} + +sub push_handler { + my ($self, $handler) = @_; + push(@{$self->handlers}, $handler); + ::squawk(1, "%d Pushed handler %s\n", $self->linenumber, ref $handler); + return; +} + +sub unshift_handler { + my ($self, $handler) = @_; + unshift(@{$self->handlers}, $handler); + ::squawk(1, "%d Unshifted handler %s\n", $self->linenumber, ref $handler); + return; +} + +sub add_reporter { + my ($self, $reporter) = @_; + my $target_reporter = do { + if (my ($r) = grep { $reporter->is_similar($_); } @{$self->reporters}) { + $r->increment_count; + ::squawk(2, "%d Incremented %s\n", $self->linenumber, ref $r); + $r; + } else { + push(@{$self->reporters}, $reporter); + ::squawk(2, "%d Added %s\n", $self->linenumber, ref $reporter); + $reporter; + } + }; + return $target_reporter; +} + +sub _is_too_old { + my ($self, $logfilename) = @_; + my $interval_start_epoch = $self->day->epoch; + my $log_mtime_epoch = (stat($logfilename))[9]; + my $is_too_old = ($log_mtime_epoch < $interval_start_epoch); + if ($is_too_old) { + ::squawk(1, "Skipping log '%s', too old.\n", $logfilename); + } + return $is_too_old; +} + +sub scan { + my ($self) = @_; + ::squawk(1, "Beginning scan for %s...\n", $self->day_str); + for my $log (@{$self->logs}) { + next if ($self->_is_too_old($log)); + $self->_scan_log($log); + } + ::squawk(1, "Completed scan.\n"); + return $self; +} + +sub _shorter_to { + my ($self, $longer_to) = @_; + my $shorter_to = $longer_to; + # For brevity, shorten the recipient address if in the expected domain. + if (my $elide_domain = $self->elide_domain) { + $shorter_to =~ s/\@$elide_domain$//; + } + return $shorter_to; +} + +sub _report_rejects { + my ($self) = @_; + my @rejects_all = grep { $_->does('Polyze::RReject'); } @{$self->reporters}; + # Partition the rejects by the recipient. + my %by_recip = (); + for my $r (@rejects_all) { push @{$by_recip{$r->to}}, $r; } + # And print them. + for my $recip (sort keys %by_recip) { + printf("\n%s\n", $self->_shorter_to($recip)); + my $rejects_recip = $by_recip{$recip}; + # Partition this recipient's rejects by reason. + my %by_recip_reason = (); + for my $r (@$rejects_recip) { push @{$by_recip_reason{$r->reason}}, $r; } + for my $reason (sort keys %by_recip_reason) { + print(" $reason\n"); + for my $r (@{$by_recip_reason{$reason}}) { + printf(" %s\n", $r->to_str); + } + } + } + # Footer. + my $count_distinct = scalar @rejects_all; + my $count_all = reduce { $a + $b->count } (0, @rejects_all); + if ($count_distinct == $count_all) { + printf("\n%d rejects found.\n", $count_distinct); + } elsif ($count_distinct < $count_all) { + printf("\n%d/%d distinct/all rejects found.\n", $count_distinct, $count_all); + } else { + die sprintf("%d > %d, this is a bug", $count_distinct, $count_all); + } + return; +} + +sub _report_line_count { + my ($self) = @_; + my ($rcount) = grep { $_->isa('Polyze::RCount'); } @{$self->reporters}; + printf( + "%s log lines for %s.\n", + ($rcount ? format_number($rcount->count) : 0), + $self->day->strftime("%A"), + ); +} + +sub report { + my ($self) = @_; + # Header. + printf("%s's mail rejects.\n", $self->day->strftime("%F %A")); + $self->_report_rejects; + $self->_report_line_count; + # Footer. + printf( + "Report completed %s by %s v%s.\n", + DateTime->now(time_zone => 'local')->strftime("%F %T %Z"), + __PACKAGE__, $VERSION, + ); + return; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/H2Early.pm b/Polyze/H2Early.pm new file mode 100644 index 0000000..2e5fa27 --- /dev/null +++ b/Polyze/H2Early.pm @@ -0,0 +1,37 @@ +package Polyze::H2Early; +use Moose; +use Polyze::HCount; +use Polyze::H2Late; +with 'Polyze::Handler'; + +sub scan_line { + my ($self, $line) = @_; + my $is_past_early_lines = $line->text =~ $self->day_regex; + return do { + if (! $is_past_early_lines) { + # No point in looking at this line any further. + $Polyze::NEXT_LINE; + } else { + # Inform the polyze object that we did find the wanted day. + $self->polyze->is_day_found(1); + # Since log lines are ordered by timestamp, our work is done. + $self->polyze->remove_handler($self); + # Add a handler to detect going past date. Put it ahead of the + # other handlers so that it can switch off current-day handlers + # promptly. + $self->polyze->unshift_handler( + Polyze::H2Late->new(polyze => $self->polyze), + ); + # We're not returning REDO so this won't count this first in-day line. + # But that's OK because the count initialises to 1. + $self->polyze->push_handler( + Polyze::HCount->new(polyze => $self->polyze), + ); + # Now let the regular handlers do their things. + $Polyze::CONTINUE_LINE; + } + }; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/H2Late.pm b/Polyze/H2Late.pm new file mode 100644 index 0000000..4ca9881 --- /dev/null +++ b/Polyze/H2Late.pm @@ -0,0 +1,29 @@ +package Polyze::H2Late; +use Moose; +with 'Polyze::Handler'; + +sub scan_line { + my ($self, $line) = @_; + my $is_past_day = ($line->text !~ $self->day_regex); + return do { + if (! $is_past_day) { + # Nothing to see here, let the other handlers do their things. + $Polyze::CONTINUE_LINE; + } else { + # The work of any within-day handler is done since we are now + # past that day. So remove all handlers (including self) + # except those specifically intended for post-day scanning. + my @retirees = + grep { ! $_->isa_post_day_handler; } @{$self->polyze->handlers}; + for my $r (@retirees) { + $self->polyze->remove_handler($r); + } + # Now that we've adjusted the handlers, redo the log line in + # order to apply that adjustment. + $Polyze::REDO_LINE; + } + }; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/HCount.pm b/Polyze/HCount.pm new file mode 100644 index 0000000..aa9e364 --- /dev/null +++ b/Polyze/HCount.pm @@ -0,0 +1,36 @@ +package Polyze::HCount; +use Moose; +use Polyze::RCount; +with 'Polyze::Handler'; + +has rcount => ( + is => 'ro', + isa => 'Polyze::RCount', + required => 0, + lazy => 1, + builder => '_build_rcount', + handles => [qw( increment_count )], +); + +sub _build_rcount { + my ($self) = @_; + # A reporter object requires a line. But since we don't care about + # the line in this case, give it a dummy. + my $rc = Polyze::RCount->new( + polyze => $self->polyze, + line => Polyze::Line->new( + text => 'Dummy', filename => 'Dummy', linenumber => 0, + ), + ); + $self->polyze->add_reporter($rc); + return $rc; +} + +sub scan_line { + my ($self, $line) = @_; + $self->increment_count; + return $Polyze::CONTINUE_LINE; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/HPostgreyAccept.pm b/Polyze/HPostgreyAccept.pm new file mode 100644 index 0000000..38e1f07 --- /dev/null +++ b/Polyze/HPostgreyAccept.pm @@ -0,0 +1,51 @@ +package Polyze::HPostgreyAccept; +use Moose; +use Domain::PublicSuffix; +with 'Polyze::Handler'; + +our $DPSuffix = Domain::PublicSuffix->new; + +has rreject => ( + is => 'ro', + isa => 'Polyze::RReject', + required => 1, +); + +sub BUILD { $_[0]->isa_post_day_handler(1); } + +my $RegClientName = qr[, client_name=([^,]+)]; +my $RegClientAddress = qr[, client_address=([^,]+)]; +my $RegSender = qr[, sender=([^,]+)]; +my $RegRecipient = qr[, recipient=([^,\n]+)]; + +sub scan_line { + my ($self, $line) = @_; + if ($line->text =~ m/ postgrey\[[[:digit:]]+\]: action=pass, /) { + my ($client_name) = ($line->text =~ $RegClientName); + $client_name //= ''; + my ($client_address) = ($line->text =~ $RegClientAddress); + $client_address //= ''; + my ($sender) = ($line->text =~ $RegSender); + $sender //= ''; + my ($recipient) = ($line->text =~ $RegRecipient); + $recipient //= ''; + + # This is in case $client_name is invalid as a domain name. + my $root_domain_or_ip = + $DPSuffix->get_root_domain($client_name) // $client_address; + # Does the current line match with our rreject? + my $is_acceptance = ( + $root_domain_or_ip eq $self->rreject->root_domain_or_ip + && $sender eq $self->rreject->from + && $recipient eq $self->rreject->to + ); + if ($is_acceptance) { + $self->rreject->is_later_accepted(1); + $self->polyze->remove_handler($self); + } + } + return $Polyze::CONTINUE_LINE; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/HReject.pm b/Polyze/HReject.pm new file mode 100644 index 0000000..797e3ad --- /dev/null +++ b/Polyze/HReject.pm @@ -0,0 +1,39 @@ +package Polyze::HReject; +use Moose; +use Polyze::RRejectCleanup; +use Polyze::RRejectRCPT; +use Polyze::RRejectMilter; +use Polyze::HPostgreyAccept; +with 'Polyze::Handler'; + +sub scan_line { + my ($self, $line) = @_; + if ($line->text =~ m/ reject: RCPT /) { + my $reject = $self->polyze->add_reporter( + Polyze::RRejectRCPT->new(polyze => $self->polyze, line => $line), + ); + if ("Greylist" eq $reject->reason && 1 == $reject->count) { + # This is the first instance of a greylist rejection. Add a + # followup handler to look for an eventual acceptance of the + # mail by postgrey. + $self->polyze->push_handler( + Polyze::HPostgreyAccept->new( + polyze => $self->polyze, + rreject => $reject, + ), + ); + } + } elsif ($line->text =~ m[/cleanup.+: reject: ]) { + $self->polyze->add_reporter( + Polyze::RRejectCleanup->new(polyze => $self->polyze, line => $line), + ); + } elsif ($line->text =~ m/: milter-reject: /) { + $self->polyze->add_reporter( + Polyze::RRejectMilter->new(polyze => $self->polyze, line => $line), + ); + } + return $Polyze::CONTINUE_LINE; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/Handler.pm b/Polyze/Handler.pm new file mode 100644 index 0000000..1c72f40 --- /dev/null +++ b/Polyze/Handler.pm @@ -0,0 +1,41 @@ +package Polyze::Handler; +use Moose::Role; + +has polyze => ( + is => 'ro', + isa => 'Polyze', + required => 1, + handles => [qw( day day_str )], +); + +has isa_post_day_handler => ( + is => 'rw', + isa => 'Bool', + required => 0, + default => 0, +); + +has day_regex => ( + is => 'ro', + isa => 'RegexpRef', + required => 0, + lazy => 1, + builder => '_build_day_regex', +); + +sub _build_day_regex { + my ($self) = @_; + my $day_str = $self->day_str; + return qr/^$day_str /; +} + +# Override this in a concrete handler. The override should return one +# of the $Polyze::*_LINE constants. +# +sub scan_line { + my ($self, $line) = @_; + die("override missing"); +} + +no Moose; +1; diff --git a/Polyze/Line.pm b/Polyze/Line.pm new file mode 100644 index 0000000..afc09eb --- /dev/null +++ b/Polyze/Line.pm @@ -0,0 +1,23 @@ +package Polyze::Line; +use Moose; + +has text => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +has filename => ( + is => 'ro', + isa => 'Str', + required => 1, +); + +has linenumber => ( + is => 'ro', + isa => 'Int', + required => 1, +); + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/RCount.pm b/Polyze/RCount.pm new file mode 100644 index 0000000..5d1846b --- /dev/null +++ b/Polyze/RCount.pm @@ -0,0 +1,14 @@ +package Polyze::RCount; +use Moose; +with 'Polyze::Reporter'; + +# This override is required in the case when self is not the first +# Reporter object added for the current scan. With real log files +# such a case is unlikely but I hit it with test input. +# +sub is_similar { + return 0; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/RReject.pm b/Polyze/RReject.pm new file mode 100644 index 0000000..4efddee --- /dev/null +++ b/Polyze/RReject.pm @@ -0,0 +1,139 @@ +package Polyze::RReject; +use Moose::Role; +use File::Basename; +use Domain::PublicSuffix; +with 'Polyze::Reporter'; + +our $DPSuffix = Domain::PublicSuffix->new; + +has from => ( + is => 'ro', + isa => 'Str', + required => 0, + builder => '_build_from', + lazy => 1, +); + +has to => ( + is => 'ro', + isa => 'Str', + required => 0, + builder => '_build_to', + lazy => 1, +); + +has sender_domain => ( + is => 'ro', + isa => 'Maybe[Str]', + required => 0, + builder => '_build_sender_domain', + lazy => 1, +); + +has sender_ip => ( + is => 'ro', + isa => 'Str', + required => 0, + builder => '_build_sender_ip', + lazy => 1, +); + +has root_domain_or_ip => ( + is => 'ro', + isa => 'Str', + required => 0, + builder => '_build_root_domain_or_ip', + lazy => 1, +); + +has reason => ( + is => 'ro', + isa => 'Str', + required => 0, + builder => '_build_reason', + lazy => 1, +); + +has is_later_accepted => ( + is => 'rw', + isa => 'Bool', + required => 0, + default => 0, +); + +sub _build_from { + my ($self) = @_; + # Try to extract the header-from, available if rejection is due to + # header_checks(5) on the From: header. + my ($f) = $self->text =~ m/: reject: header From: (.+?) from \b/; + if (!$f) { + # Get the other one, the mail-from. + ($f) = ($self->text =~ m/ from=<([^>]+)>/); + } + return $f // "[no from]"; +} + +sub _build_to { + my ($self) = @_; + my ($t) = ($self->text =~ m/ to=<([^>]+)>/); + return $t // "[no to]"; +} + +sub _build_sender_domain { + die("missing override"); +} + +sub _build_sender_ip { + die("missing override"); +} + +sub _build_reason { + die("missing override"); +} + +sub _build_root_domain_or_ip { + my ($self) = @_; + return do { + if (defined $self->sender_domain) { + # 'local' is a special case, i.e. for mail sent via the + # 'sendmail' binary. + if ('local' eq $self->sender_domain) { + 'local'; + } else { + $DPSuffix->get_root_domain($self->sender_domain) + // $self->sender_ip; + } + } else { + $self->sender_ip; + } + }; +} + +sub to_str { + my ($self) = @_; + my $fn = basename($self->line->filename); + # For brevity, shorten the log file name. + $fn =~ s/^mail.log//; + $fn ||= "_"; + return sprintf( + "%s%s %s/%d %s: %s", + ($self->is_later_accepted ? '*' : ''), + ($self->count > 1 ? $self->count : ' '), + $fn, $self->linenumber, + $self->root_domain_or_ip, $self->from, + ); +} + +sub is_similar { + my ($lhs, $rhs) = @_; + return ( + $rhs->does('Polyze::RReject') + && $lhs->reason eq $rhs->reason + && $lhs->from eq $rhs->from + && $lhs->to eq $rhs->to + && $lhs->root_domain_or_ip eq $rhs->root_domain_or_ip + ); +} + +no Moose; +1; diff --git a/Polyze/RRejectCleanup.pm b/Polyze/RRejectCleanup.pm new file mode 100644 index 0000000..ed9b6dd --- /dev/null +++ b/Polyze/RRejectCleanup.pm @@ -0,0 +1,42 @@ +package Polyze::RRejectCleanup; +use Moose; +with 'Polyze::RReject'; + +sub _build_sender_domain { + my ($self) = @_; + my ($domain) = ( + $self->text =~ m/: reject: header .+? from ([^[]+)?\[/ + ); + if (!$domain) { + # If it didn't come from outside, look for 'local'. + ($domain) = $self->text =~ m/: reject: header .+? from (local);/ + } + return $domain; +} + +sub _build_sender_ip { + my ($self) = @_; + my ($ip) = ( + $self->text =~ m/: reject: .+ from (?:[^[]+)?\[([^]]+)\]/ + ); + return $ip // ""; +} + +# This attempts to capture the reject's "optional text" (plus the code +# prepended by postfix, see header_checks(5)). It captures the text +# following the last ':', which is not ideal if the optional text +# itself contains a ':'. +# +my $RegCleanup = qr{ + /cleanup\[[0-9]+\]:\ [[:alnum:]]+: + \ reject:\ .+:\ ([^:\n]+) + }x; + +sub _build_reason { + my ($self) = @_; + my ($cleanup_defect) = $self->text =~ $RegCleanup; + return $cleanup_defect; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/RRejectMilter.pm b/Polyze/RRejectMilter.pm new file mode 100644 index 0000000..6f4b9c1 --- /dev/null +++ b/Polyze/RRejectMilter.pm @@ -0,0 +1,26 @@ +package Polyze::RRejectMilter; +use Moose; +with 'Polyze::RReject'; + +sub _build_sender_domain { + my ($self) = @_; + my ($domain) = ( + $self->text =~ m/: milter-reject: (?:[^ ]+) from ([^[]+)?\[/ + ); + return $domain; +} + +sub _build_sender_ip { + my ($self) = @_; + my ($ip) = ( + $self->text =~ m/: milter-reject: (?:[^ ]+) from (?:[^[]+)?\[([^]]+)\]/ + ); + return $ip; +} + +sub _build_reason { + "Milter reject"; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/RRejectRCPT.pm b/Polyze/RRejectRCPT.pm new file mode 100644 index 0000000..4f95f98 --- /dev/null +++ b/Polyze/RRejectRCPT.pm @@ -0,0 +1,72 @@ +package Polyze::RRejectRCPT; +use Moose; +with 'Polyze::RReject'; + +sub _build_sender_domain { + my ($self) = @_; + my ($domain) = ($self->text =~ m/ RCPT from ([^[]+)?\[/); + return $domain; +} + +sub _build_sender_ip { + my ($self) = @_; + my ($ip) = ($self->text =~ m/ RCPT from (?:[^[]+)?\[([^]]+)\]/); + return $ip; +} + +# Regexes for identifying the rejection reason. + +my $RegGreylist = + qr[rejected: Greylisted, see http://postgrey.schweikert.ch]; + +my $RegReverseDNS = + qr[Client host rejected: cannot find your reverse hostname]; + +my $RegBlocked = qr[ + # Designed to capture a string like "zen.spamhaus.org". + postfix/postscreen\b.+ + \ Service\ unavailable;\ client\ .+ + \ blocked\ using\ ([^[:space:];]+) + ]x; + +my $RegRelay = qr[Relay access denied;]; + +# For postfix's recipient address rejections, e.g. 'User unknown', +# 'undeliverable address' or rejected via check_recipient_access. +my $RegCheckRecipient = qr[: Recipient address rejected: ([^:;]+).*; from=<]; + +my $RegProtocolErr = qr[ Protocol error; from=<]; + +# For postfix's reject_unknown_sender_domain. +my $RegSenderAddress = qr[: (Sender address rejected: .+); from=<]; + +# For postfix's reject_non_fqdn_helo_hostname. +my $RegHelo = qr[: (Helo command rejected: .+); from=<]; + +sub _build_reason { + my ($self) = @_; + my $t = $self->text; + return do { + if ($t =~ $RegGreylist) + { "Greylist"; } + elsif ($t =~ $RegReverseDNS) + { "No reverse DNS"; } + elsif (my ($blocker) = $t =~ $RegBlocked) + { $blocker; } + elsif ($t =~ $RegRelay) + { "Relay access denied"; } + elsif (my ($recipient_defect) = $t =~ $RegCheckRecipient) + { $recipient_defect; } + elsif ($t =~ $RegProtocolErr) + { "Protocol error"; } + elsif (my ($sender_defect) = $t =~ $RegSenderAddress) + { $sender_defect; } + elsif (my ($helo_defect) = $t =~ $RegHelo) + { $helo_defect; } + else + { "Other"; } + }; +} + +no Moose; +__PACKAGE__->meta->make_immutable; diff --git a/Polyze/Reporter.pm b/Polyze/Reporter.pm new file mode 100644 index 0000000..86739d8 --- /dev/null +++ b/Polyze/Reporter.pm @@ -0,0 +1,36 @@ +package Polyze::Reporter; +use Moose::Role; + +has polyze => ( + is => 'ro', + isa => 'Polyze', + required => 1, + handles => [qw( elide_domain )], +); + +has line => ( + is => 'ro', + isa => 'Polyze::Line', + required => 1, + handles => [qw( linenumber text )], +); + +has count => ( + is => 'ro', + isa => 'Int', + required => 0, + default => 1, + writer => '_set_count', +); + +sub increment_count { + my ($self) = @_; + $self->_set_count($self->count + 1); +} + +sub is_similar { + die("override missing"); +} + +no Moose; +1; diff --git a/README b/README new file mode 100644 index 0000000..2195583 --- /dev/null +++ b/README @@ -0,0 +1,147 @@ + +About +----- + +This is a log file analyser for Postfix. It reports on mail rejected +by Postfix, with the aim of helping avoid the rejection of legitimate +mail. It aims to reports on all rejections so might not be suitable +for a high traffic site. + +It runs on Debian and - probably - other systems where perl and bash +are available. + + +Example report +-------------- + +Begin example ---> +2018-07-23 Monday's mail rejects. + +jbloggs + Greylist + * _/506 example.com: me@example.com + +spameri@tiscali.it + No reverse DNS + 2 _/703 190.116.27.234: spameri@tiscali.it + +2/3 distinct/all rejects found. +683 log lines for Monday. +Report completed 2022-09-04 06:25:04 BST by Polyze. +<--- end example + +The first line shows the calendar day reported on. Rejections are +then grouped by recipient address, here "jbloggs" and +"spameri@tiscali.it". Because jbloggs is a local user, the nonlocal +part is omitted. The spameri address represents a relay attempt so +shows the targetted domain. + +Rejections are then grouped by rejection reason, here "Greylist" followed by "No +reverse DNS". + +The first rejection line, from left to right: + + *: the mail was later accepted by postgrey. + _: replaces 'mail.log' (".1" likewise replaces "mail.log.1"). + 506: line number of the first instance. + example.com: sender's root domain (subdomain is elided). +me@example.com: from-address. + +More about Greylist: the '*' indicates only that polyze found a later +'action=pass' line that matched the rejection. It doesn't guarantee +that the mail was delivered since it might have been rejected for +another reason. The search for the 'pass' line isn't confined to the +target day; polyze will search to the end of the last log file. + +Rejections other than Greylist are assumed to be final. Polyze +doesn't look at the SMTP status code. + +In the other rejection line, beginning '2 _/703...', the 2 is a +"similarity count". Two rejections are 'similar' if they match on all +of: the reason; the from address; the to address; and the sending +server's root domain (if available) or ip address. + +When polyze finds a rejection it looks for a similar one that it found +already to increment its count. If it doesn't find one it adds a new +rejection with a count of 1. The example report shows three +rejections of which two are similar. + + +Copying +------- + +©Copyright + +polyze may be copied only under the same terms as perl[1] or under the +GNU GPL[2] (version 3 or later). This is free software: you are free +to change and redistribute it. There is NO WARRANTY, to the extent +permitted by law. + +1. http://dev.perl.org/licenses/artistic.html +2. https://www.gnu.org/licenses/gpl-3.0.en.html + + +Dependencies +------------ + +postfix +libdomain-publicsuffix-perl +libdatetime-perl +libmoose-perl +libcommon-sense-perl +libnumber-format-perl + + +Installing +---------- + +Install dependencies. + +After a git clone, cd into polyze/ and with root privilege do + + # DESTDIR='' make install + +To sanity check, run + + # polyze --help + +To try it out, run + + # polyze + +on its own and it will attempt to report on yesterday's mail +rejections, to standard output. + + +Cron +---- + +This assumes your system supports /etc/cron.daily/. + +Included is /etc/cron.daily/polyze which mails the postmaster a report +on yesterday's rejections. This cron script uses the postconf value +of mydomain for two things. One is to infer the postmaster's email +address. The other is the '--elide-domain' value (see 'polyze +--help'). To see the mydomain value, run + + $ postconf -h mydomain + +If that prints 'example.com' then polyze mails its reports to + and it removes '@example.com' from reported +recipient addresses. If you need a different domain, edit +/etc/cron.daily/polyze to change the argument passed to +send-polyze-daily. + +There's no debian confile cleverness here unfortunately, so if you +ever re-install, you'll need to repeat your edit. Or get your +'mydomain' set suitably. + + +Bugs +---- + +Polyze should probably do something with the SMTP status code. + +The perl modules lack pod. + +Send bug reports or comments to . diff --git a/README.md b/README.md deleted file mode 100644 index 329a5a7..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# polyze - -A log file analyzer for postfix, for small sites. \ No newline at end of file diff --git a/polyze b/polyze new file mode 100755 index 0000000..36610e2 --- /dev/null +++ b/polyze @@ -0,0 +1,104 @@ +#!/usr/bin/perl + +use English qw(-no_match_vars); +use common::sense; +use File::Basename; +use Polyze; +use DateTime; +use Getopt::Long; + +my ($help, $version, $verbosity, + $days_ago, $elide_domain, + $log_files, + ) = ( + 0, 0, 0, + 1, "", + [ "/var/log/mail.log.1", "/var/log/mail.log" ], +); +my $program_name = basename($PROGRAM_NAME); + +GetOptions( + "help" => \$help, + "version" => \$version, + "verbose+" => \$verbosity, + "days-ago=i" => \$days_ago, + "log-file=s@" => \$log_files, + "elide-domain=s" => \$elide_domain, +) or die($OS_ERROR); + +if ($help) { + my $msg = <. +HELP + print($msg); + exit(0); +} +elsif ($version) { + my $v = $Polyze::VERSION; + my $msg = < +$program_name may be copied only under the same terms as perl or under +the GNU GPL (version 3 or later). This is free software: you are free +to change and redistribute it. There is NO WARRANTY, to the extent +permitted by law. +VERSION + print $msg; + exit(0); +} + +sub squawk { + my $howloud = shift; + die("Not a valid loudness: '$howloud'") if ($howloud !~ m/^[0-9]$/); + if ($howloud <= $verbosity) { + printf(@_); + } +} + +my $p = Polyze->new( + day => DateTime->today(time_zone => 'local')->subtract(days => $days_ago), + logs => $log_files, + elide_domain => $elide_domain, +); +if ($p->scan->is_day_found) { + $p->report; +} else { + printf(STDERR "Error: date '%s' not found.\n", $p->day_str); + exit(1); +} + +exit(0); diff --git a/polyze.cron.daily b/polyze.cron.daily new file mode 100755 index 0000000..54d6841 --- /dev/null +++ b/polyze.cron.daily @@ -0,0 +1,5 @@ +#!/bin/sh + +if [ -x /usr/local/sbin/send-polyze-daily ]; then + /usr/local/sbin/send-polyze-daily $(postconf -h mydomain) +fi diff --git a/send-polyze-daily b/send-polyze-daily new file mode 100755 index 0000000..829a1fa --- /dev/null +++ b/send-polyze-daily @@ -0,0 +1,30 @@ +#!/bin/bash +# Run a polyze report and mail it to postmaster. + +set -o errexit +set -o nounset +set -o pipefail + +DOMAIN="$1" + +sendmail -bm -F "Polyze" -f polyze@$DOMAIN postmaster@$DOMAIN < <( + cat < 0)); then + printf "%d items queued.\n\n" $queue_count + fi + + # Report rejections. + polyze --elide-domain="$DOMAIN" + cat <