[SA-exim] sa-exim cvs with greylisting support

Marc MERLIN marc at merlins.org
Sat Mar 6 17:40:50 PST 2004


On Fri, Mar 05, 2004 at 06:14:35PM +0100, Sander Smeenk wrote:
> > Even sa-exim in itself is not nearly as effective if you're not
> > running it on all your MXes.
> 
> My fallbacks don't really care about huge queues of frozen mail,
> aparently ;)
 
Silly them :)

> Looking forward to it, then.  I'll set up my fallback mx, and implement
> greylisting, if at all possible. (I don't like patching spamasassin, did
> you get any feedback on your patch yet?)

They're not going to bother since they are concentrating on SA 3.0,
which will have plugin support
(I had to patch the SA logic to allow my rule to run last, which is not
something SA supported in the 2.5/2.6 trees)

I'll also update my patch in the CVS tree (it was for an older SA which
just worked differently, and the patch doesn't apply well at all).
For that matter, while I can't do CVS too well over GPRS, I'll attach
the new SA patch to this mail

Marc
-- 
"A mouse is a device used to point at the xterm you want to type in" - A.S.R.
Microsoft is to operating systems & security ....
                                      .... what McDonalds is to gourmet cooking
Home page: http://marc.merlins.org/   |   Finger marc_f at merlins.org for PGP key
-------------- next part --------------
diff -urN SpamAssassin.orig/Conf.pm SpamAssassin/Conf.pm
--- SpamAssassin.orig/Conf.pm	Mon Dec 15 22:41:57 2003
+++ SpamAssassin/Conf.pm	Sun Feb 29 17:42:58 2004
@@ -107,6 +107,10 @@
 use constant TYPE_URI_EVALS     => 0x0011;
 use constant TYPE_META_TESTS    => 0x0012;
 use constant TYPE_RBL_EVALS     => 0x0013;
+# Need to reserve a number with the SA folks (needs to be odd as it is an
+# eval test)
+use constant TYPE_RES_EVALS	=> 0x0021;
+
 
 $VERSION = 'bogus';     # avoid CPAN.pm picking up version strings later
 
@@ -2000,12 +2004,15 @@
 
 =cut
 
-    if (/^header\s+(\S+)\s+(?:rbl)?eval:(.*)$/) {
+    if (/^header\s+(\S+)\s+(?:rbl|res)?eval:(.*)$/) {
       my ($name, $fn) = ($1, $2);
 
       if ($fn =~ /^check_rbl/) {
 	$self->add_test ($name, $fn, TYPE_RBL_EVALS);
       }
+      elsif (/^header\s+(\S+)\s+reseval:(.*)$/) {
+	$self->add_test ($name, $fn, TYPE_RES_EVALS);
+      }
       else {
 	$self->add_test ($name, $fn, TYPE_HEAD_EVALS);
       }
@@ -2603,6 +2610,9 @@
 	}
 	elsif ($type == TYPE_RBL_EVALS) {
 	  $self->{rbl_evals}->{$name} = \@args;
+ 	}
+ 	elsif ($type == TYPE_RES_EVALS) {
+ 	  $self->{res_evals}->{$name} = \@args;
 	}
 	elsif ($type == TYPE_RAWBODY_EVALS) {
 	  $self->{rawbody_evals}->{$name} = \@args;
diff -urN SpamAssassin.orig/EvalTests.pm SpamAssassin/EvalTests.pm
--- SpamAssassin.orig/EvalTests.pm	Sat Jan 17 15:56:08 2004
+++ SpamAssassin/EvalTests.pm	Sun Feb 29 19:02:46 2004
@@ -1941,6 +1941,202 @@
   return $self->{habeas_swe};
 }
 
+
+# This was originally written to implement greylisting in SA-Exim, although
+# I have tried to make it more general and allow for reuse in other MTAs
+# (although they will need to
+# 1) be running SA at SMTP time
+# 2) Provide the list of rcpt to and env from in some headers for SA to read
+# 3) Provide the IP of the connecting host )
+#
+# This rule should get a negative score so that if we've already seen the
+# greylisting tuplet before, we lower the score, which hopefully brings us from
+# a tempreject to an accept (at least that's how sa-exim does it)
+# -- Marc <marc_soft at merlins.org> 2004/01/19
+
+sub greylisting {
+  # db/file/dir / pointer type / how many secs to greylist after 1st connection
+  # SA score after which we don't bother running / SMTP time data header names 
+  my ($self, $dirorfileordb, $method, $greylisttime, $dontcheckscore,
+	  $connectiphdr, $envfromhdr, $rcpttohdr) = @_;
+  my $dirorfile = shift @_;
+
+  my $connectip; 
+  my $envfrom;
+  my $rcptto;
+  my @rcptto;
+  my $iswhitelisted=0;
+  my $err;
+  my $mesgid = $self->get ('Message-Id')."\n"; 
+  my $tuplet;
+  
+  # No newlines, thank you (yes, you need this twice apparently)
+  chomp ($mesgid);
+  chomp ($mesgid);
+  $mesgid =~ s/\012/|/g;
+
+  # For stuff that we know is spam, don't greylist the host
+  # (that might help later spam with a lower score to come in)
+  if ($self->{hits} >= $dontcheckscore)
+  {
+    #warn "debug: skipping greylisting on $mesgid, since score is already ".$self->{hits}." and you configured greylisting to not bother with anything above $dontcheckscore\n";
+    return 0;
+  }
+
+
+  if (not $connectip = $self->get($connectiphdr))
+  {
+    warn "Couldn't get Connecting IP header $connectiphdr for message $mesgid, skipping greylisting call\n";
+    return 0;
+  }
+  chomp($connectip);
+  # Clean up input (for security, if you use files/dirs)
+  $connectip =~ s#/#-#g;
+
+  if (not $envfrom = $self->get($envfromhdr))
+  {
+    warn "Couldn't get Envelope From header $envfromhdr for message $mesgid, skipping greylisting call\n";
+    return 0;
+  }
+  chomp($envfrom);
+  # Clean up input (for security, if you use files/dirs)
+  $envfrom =~ s#/#-#g;
+
+  if (not $rcptto = $self->get($rcpttohdr))
+  {
+    warn "Couldn't get Rcpt To header $rcpttohdr for message $mesgid, skipping greylisting call\n";
+    return 0;
+  }
+  chomp($rcptto);
+  # Clean up input (for security, if you use files/dirs)
+  $rcptto =~ s#/#-#g;
+  @rcptto = split(/, /, $rcptto);
+
+  umask 0007;
+
+  foreach $rcptto (@rcptto)
+  {
+    # The dir method is easy to fiddle with and expire records in (with
+    # a find | rm) but it's probably more I/O extensive than a real DB
+    # and suffers from directory size problems if a specific IP is sending
+    # generating tens of thousands of tuplets. -- Marc
+    # That said, I prefer formats I can easily tinker with, and not having to
+    # worry about buggy locking and so forth 
+    if ($method eq "dir")
+    {
+      my ($ipbyte1, $ipbyte2, $ipbyte3, $ipbyte4) = split(/\./, $connectip); 
+      my $ipdir1 = "$dirorfileordb/$ipbyte1";
+      my $ipdir2 = "$ipdir1/$ipbyte2";
+      my $ipdir3 = "$ipdir2/$ipbyte3";
+      my $ipdir4 = "$ipdir3/$ipbyte4";
+      my $tupletdir = "$ipdir4/$envfrom";
+
+      $tuplet = "$tupletdir/$rcptto";
+  
+      # make directory whether it's there or not (faster than test and set)
+      mkdir $ipdir1;
+      mkdir $ipdir2;
+      mkdir $ipdir3;
+      mkdir $ipdir4;
+      mkdir $tupletdir;
+
+      if (not -e $tuplet) 
+      {
+        # If the tuplets aren't there, we create them and continue in
+        # case there are other ones (one of them might be whitelisted already)
+	$err="creating $tuplet";
+	open (TUPLET, ">$tuplet") or goto greylisterror;
+	print TUPLET time."\n";
+	print TUPLET "Status: Greylisted\n";
+	print TUPLET "Last Message-Id: $mesgid\n";
+	print TUPLET "Whitelisted Count: 0\n";
+	print TUPLET "Query Count: 1\n";
+	$err="closing first-written $tuplet";
+	close TUPLET or goto greylisterror;
+      }
+      else
+      {
+	my $time;
+	my $status;
+	my $whitelistcount;
+	my $querycount;
+
+	# Take into account race condition of expiring deletes and us running
+	$err="reading $tuplet";
+	open (TUPLET, "<$tuplet") or goto greylisterror;
+	$err="Couldn't read time";
+	defined ($time=<TUPLET>) or goto greylisterror;
+	chomp ($time);
+
+	$err="Couldn't read status";
+	defined ($status=<TUPLET>) or goto greylisterror;
+	chomp ($status);
+	$err="Couldn't extract Status from $status";
+	$status =~ s/^Status: // or goto greylisterror;
+
+	# Skip Mesg-Id
+	$err="Couldn't skip Mesg-Id";
+	defined ($_=<TUPLET>) or goto greylisterror;
+
+	$err="Couldn't read whitelistcount";
+	defined ($whitelistcount=<TUPLET>) or goto greylisterror;
+	chomp ($whitelistcount);
+	$err="Couldn't extract Whitelisted Count from $whitelistcount";
+	$whitelistcount =~ s/^Whitelisted Count: // or goto greylisterror;
+
+	$err="Couldn't read querycount";
+	defined ($querycount=<TUPLET>) or goto greylisterror;
+	chomp ($querycount);
+	$err="Couldn't extract Query Count from $querycount";
+	$querycount =~ s/^Query Count: // or goto greylisterror;
+	close (TUPLET);
+
+	$querycount++;
+	if ((time - $time) > $greylisttime)
+	{
+	  $status="Whitelisted";
+	  $whitelistcount++;
+	}
+
+	$err="re-writing $tuplet";
+	open (TUPLET, ">$tuplet") or goto greylisterror;
+	print TUPLET "$time\n";
+	print TUPLET "Status: $status\n";
+	print TUPLET "Last Message-Id: $mesgid\n";
+	print TUPLET "Whitelisted Count: $whitelistcount\n";
+	print TUPLET "Query Count: $querycount\n";
+	$err="closing re-written $tuplet";
+	close TUPLET or goto greylisterror;
+
+        # We continue processing the other receipients, to setup or
+	# update their counters
+	if ($status eq "Whitelisted")
+	{
+	  $iswhitelisted=1;
+	}
+      }
+    }
+    elsif ($method eq "file")
+    {
+      warn "codeme\n";
+    }
+    elsif ($method eq "db")
+    {
+      warn "codeme\n";
+    }
+  }
+  
+  return $iswhitelisted;
+  
+  greylisterror:
+  warn "Reached greylisterror: $err / $!";
+  # delete tuplet since it apparently had issues but don't check for errors
+  # in case it was a permission denied on write
+  unlink ($tuplet);
+  return $iswhitelisted;
+}
+
+
 ###########################################################################
 # BODY TESTS:
 ###########################################################################
diff -urN SpamAssassin.orig/PerMsgStatus.pm SpamAssassin/PerMsgStatus.pm
--- SpamAssassin.orig/PerMsgStatus.pm	Tue Jan 20 13:40:04 2004
+++ SpamAssassin/PerMsgStatus.pm	Sun Feb 29 19:01:19 2004
@@ -184,6 +184,9 @@
 
     # add points from Bayes, before adjusting the AWL
     $self->{hits} += $self->{learned_hits};
+    
+    # Now, we can run rules that have to run last
+    $self->do_res_eval_tests();
 
     # Do AWL tests last, since these need the score to have already been
     # calculated
@@ -2010,6 +2013,11 @@
 }
 
 ###########################################################################
+
+sub do_res_eval_tests {
+  my ($self) = @_;
+  $self->run_eval_tests ($self->{conf}->{res_evals}, '');
+}
 
 sub do_head_eval_tests {
   my ($self) = @_;


More information about the SA-Exim mailing list