From e70b8004e17cc67026f00742e7a6c0e488b03975 Mon Sep 17 00:00:00 2001 From: "Murray S. Kucherawy" Date: Sat, 7 Jul 2012 21:57:25 -0700 Subject: [PATCH] Add contrib and contrib/rddmarc --- Makefile.am | 2 +- configure.ac | 2 + contrib/Makefile.am | 5 ++ contrib/README | 11 +++ contrib/rddmarc/Makefile.am | 6 ++ contrib/rddmarc/README | 31 ++++++++ contrib/rddmarc/dmarcfail.py | 63 ++++++++++++++++ contrib/rddmarc/mkdmarc | 47 ++++++++++++ contrib/rddmarc/rddmarc | 141 +++++++++++++++++++++++++++++++++++ 9 files changed, 307 insertions(+), 1 deletion(-) create mode 100644 contrib/Makefile.am create mode 100644 contrib/README create mode 100644 contrib/rddmarc/Makefile.am create mode 100644 contrib/rddmarc/README create mode 100644 contrib/rddmarc/dmarcfail.py create mode 100644 contrib/rddmarc/mkdmarc create mode 100644 contrib/rddmarc/rddmarc diff --git a/Makefile.am b/Makefile.am index c604fbeb..ba45d1b4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,7 +4,7 @@ bindir=@LIBDMARC_DIR@/bin libdir=@LIBDMARC_DIR@/lib includedir=@LIBDMARC_DIR@/include -SUBDIRS = libopendmarc docs opendmarc reports +SUBDIRS = contrib docs libopendmarc opendmarc reports auxdir = @ac_aux_dir@ AUX_DIST = $(auxdir)/install-sh $(auxdir)/missing \ diff --git a/configure.ac b/configure.ac index 16e4c802..115458d6 100644 --- a/configure.ac +++ b/configure.ac @@ -342,6 +342,8 @@ AC_SUBST([SYSCONFDIR]) # Generate files # AC_OUTPUT([ Makefile + contrib/Makefile + contrib/rddmarc/Makefile docs/Makefile libopendmarc/Makefile libopendmarc/tests/Makefile diff --git a/contrib/Makefile.am b/contrib/Makefile.am new file mode 100644 index 00000000..fcd7b761 --- /dev/null +++ b/contrib/Makefile.am @@ -0,0 +1,5 @@ +# Copyright (c) 2012, The Trusted Domain Project. All rights reserved. + +SUBDIRS = rddmarc + +dist_doc_DATA = README diff --git a/contrib/README b/contrib/README new file mode 100644 index 00000000..cec28847 --- /dev/null +++ b/contrib/README @@ -0,0 +1,11 @@ +This "contrib" directory of the OpenDMARC package and its subdirectories +contain files contributed by members of the community that provide functions +not directly supported by the project team. The copyrights on the files in +and/or below this directory are owned by the files' owners and not by +The Trusted Domain Project. + +Support for files contained here are provided only on a best-effort basis by +the project team and by the files' owners. + +-- +Copyright (c) 2012, The Trusted Domain Project. All rights reserved. diff --git a/contrib/rddmarc/Makefile.am b/contrib/rddmarc/Makefile.am new file mode 100644 index 00000000..d11bed01 --- /dev/null +++ b/contrib/rddmarc/Makefile.am @@ -0,0 +1,6 @@ +# Copyright (c) 2012, The Trusted Domain Project. All rights reserved. + +dist_doc_DATA = README.rddmarc \ + dmarcfail.py \ + mkdmarc \ + rddmarc diff --git a/contrib/rddmarc/README b/contrib/rddmarc/README new file mode 100644 index 00000000..eafcee47 --- /dev/null +++ b/contrib/rddmarc/README @@ -0,0 +1,31 @@ +These are little scripts to parse DMARC reports. + +The first, rddmarc, is a perl script that take an incoming DMARC +summary report email, extracts and unpacks the ZIP file, parses the +XML, and puts the parts about received mail into a MySQL database. +The database is set up to handle reports about multiple domains from +multiple reporters. It's handling reports from Google, Yahoo, xs4all +and Netease. + +It expects filenames on the command line, each of which contains a +mail message, but it'd easy enough to adjust it to read stdin or +anywhere else. + +It works great on FreeBSD, can probably be made to work on linux with +modest effort, no clue about other systems. It needs the +MIME::Parser, XML::Simple, and DBI perl modules and the freeware unzip +program to extract stuff from the ZIP file. + +The second is a python script to parse failure reports. It expects +file names on the command line, or if no arguments, it reads stdin. It +needs the usual MySQLdb module. It handles reports from Netease, +which are currently the only ones I'm getting. + +mkdmarc - SQL to create the tables + +rddmarc - the script to parse summary reports (Perl) + +dmarcfail.py - the script to parse failure reports (python) + + + diff --git a/contrib/rddmarc/dmarcfail.py b/contrib/rddmarc/dmarcfail.py new file mode 100644 index 00000000..35c5b7ba --- /dev/null +++ b/contrib/rddmarc/dmarcfail.py @@ -0,0 +1,63 @@ +#!/usr/local/bin/python +# parse a DMARC failure report, add it to the mysql database + +import re +import email +import time +import MySQLdb + +db = MySQLdb.connect(user='dmarc',passwd='xxx',db='dmarc', use_unicode=True) +MySQLdb.paramstyle='format' + +def dmfail(h,f): + e = email.message_from_file(h) + if(e.get_content_type() != "multipart/report"): + print f,"is not a report" + return + + for p in e.get_payload(): + if(p.get_content_type() == "message/feedback-report"): + r = email.parser.Parser() + fr = r.parsestr(p.get_payload()[0].as_string(), True) + fx = re.search(r'<(.+?)@(.+?)>', fr['original-mail-from']) + origbox,origdom = fx.group(1,2) + arr = int(email.utils.mktime_tz(email.utils.parsedate_tz(fr['arrival-date']))) + + elif(p.get_content_type() == "message/rfc822" or + p.get_content_type() == "text/rfc822-headers"): + + m = email.message_from_string(p.get_payload()) + frombox = fromdom = None + fx = re.search(r'<(.+?)@(.+?)>', m['from']) + if(fx): frombox,fromdom = fx.group(1,2) + else: + t = re.sub(m['from'],r"\s+|\([^)]*\)","") + fx = re.match(r'(.+?)@(.+?)', t) + if(fx): frombox,fromdom = fx.group(1,2) + + # OK, parsed it, now add an entry to the database + #print fr['reported-domain'],origdom,origbox,fromdom,frombox,arr,fr['source-ip'],"===" + #print m.as_string() + #print "===" + c = db.cursor() + c.execute("""INSERT INTO failure(serial,org,bouncedomain,bouncebox,fromdomain, + frombox,arrival,sourceip,headers) + VALUES(NULL,%s,%s,%s,%s,%s,FROM_UNIXTIME(%s),INET_ATON(%s),%s)""", + (fr['reported-domain'],origdom,origbox,fromdom,frombox,arr,fr['source-ip'],m.as_string())) + print "Inserted failure report %s" % c.lastrowid + c.close() + + +if __name__ == "__main__": + import sys + + if(len(sys.argv) < 2): + dmfail(sys.stdin,"stdin"); + else: + for f in sys.argv[1:]: + h = open(f) + dmfail(h, f) + h.close() + + + diff --git a/contrib/rddmarc/mkdmarc b/contrib/rddmarc/mkdmarc new file mode 100644 index 00000000..aa853918 --- /dev/null +++ b/contrib/rddmarc/mkdmarc @@ -0,0 +1,47 @@ +-- database of dmarc data + +USE dmarc + +CREATE TABLE report ( + serial int(10) unsigned NOT NULL AUTO_INCREMENT, + mindate timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + maxdate timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + domain varchar(255) NOT NULL, + org varchar(255) NOT NULL, + reportid varchar(255) NOT NULL, + PRIMARY KEY (serial), + UNIQUE KEY domain (domain,reportid) +); + +CREATE TABLE rptrecord ( + serial int(10) unsigned NOT NULL, + ip int(10) unsigned NOT NULL, + rcount int(10) unsigned NOT NULL, + disposition enum('none','quarantine','reject'), + reason varchar(255), + dkimdomain varchar(255), + dkimresult enum('none','pass','fail','neutral','policy','temperror','permerror'), + spfdomain varchar(255), + spfresult enum('none','neutral','pass','fail','softfail','temperror','permerror'), + KEY serial (serial,ip) +); + +CREATE TABLE failure ( + serial int(10) unsigned NOT NULL AUTO_INCREMENT, + org varchar(255) NOT NULL, -- reported-domain + bouncedomain varchar(255), -- MAIL FROM bouncebox@bouncedomain + bouncebox varchar(255), + fromdomain varchar(255), -- From: frombox@fromdomain + frombox varchar(255), + arrival TIMESTAMP, + sourceip int unsigned, -- inet_aton(source-ip) + sourceip6 BINARY(16), -- inet_6top(source-ip) + headers TEXT, + PRIMARY KEY(serial), + KEY(sourceip), + KEY(fromdomain), + KEY(bouncedomain) +) charset=utf8; + +GRANT all on dmarc.* to dmarc identified by 'xxx'; +GRANT all on dmarc.* to dmarc@localhost identified by 'xxx'; diff --git a/contrib/rddmarc/rddmarc b/contrib/rddmarc/rddmarc new file mode 100644 index 00000000..244b635a --- /dev/null +++ b/contrib/rddmarc/rddmarc @@ -0,0 +1,141 @@ +#!/usr/bin/perl + +use strict; +use MIME::Parser; +use MIME::Words qw(:all); +use XML::Simple; +use DBI; + +my $dbh = DBI->connect("DBI:mysql:database=dmarc", + "dmarc", "xxx") + or die "Cannot connect to database\n"; + +foreach my $i (@ARGV) { + print "parsing $i\n"; + + my $parser = new MIME::Parser; + $parser->output_dir("/tmp"); + + my $ent = $parser->parse_open($i); + + my $body = $ent->bodyhandle; + my $zip = $body; + my $mtype = $ent->mime_type; + my $subj = decode_mimewords($ent->get('subject')); + print " $subj"; + # if multipart/whatever, look through the parts to find a ZIP + if(lc $mtype =~ "multipart/") { + print "Look through $mtype\n"; + $zip = undef; + my $npart = $ent->parts; + for my $n (0..($npart-1)) { + my $part = $ent->parts($n); + if(lc $part->mime_type eq "application/zip" + or lc $part->mime_type eq "application/x-zip-compressed") { + $zip = $part->bodyhandle; + last; + } else { + $part->bodyhandle->purge; # not useful + } + } + die "no zip" unless $zip; + } + elsif(lc $mtype ne "application/zip") { + print "don't understand $mtype\n"; + next; + } + if(defined($zip->path)) { + #print "body is in " . $zip->path . "\n"; + } else { + print "body is nowhere\n"; + next; + } + open(XML,"unzip -p " . $zip->path . " |") + or die "cannot unzip $zip->path"; + my $xml = ""; + $xml .= $_ while ; + close XML; + + my $xs = XML::Simple->new(); + + my $ref = $xs->XMLin($xml); + my %xml = %{$ref}; + #print join "\n",keys %xml; + #print "\n"; + my $from = $xml{'report_metadata'}->{'date_range'}->{'begin'}; + my $to = $xml{'report_metadata'}->{'date_range'}->{'end'}; + my $org = $xml{'report_metadata'}->{'org_name'}; + my $id = $xml{'report_metadata'}->{'report_id'}; + my $domain = $xml{'policy_published'}->{'domain'}; + # see if already stored + my ($xorg,$xid) = $dbh->selectrow_array(qq{SELECT org,reportid FROM report WHERE reportid=?}, undef, $id); + if($xorg) { + print "Already have $xorg $xid, skipped\n"; + $zip->purge; + $ent->purge; + next; + } + + my $sql = qq{INSERT INTO report(serial,mindate,maxdate,domain,org,reportid) + VALUES(NULL,FROM_UNIXTIME(?),FROM_UNIXTIME(?),?,?,?)}; + $dbh->do($sql, undef, $from, $to, $domain, $org, $id) + or die "cannot make report" . $dbh->errstr; + my $serial = $dbh->{'mysql_insertid'} || $dbh->{'insertid'}; + print " serial $serial "; + my $record = $xml{'record'}; + sub dorow($$) { + my ($serial,$recp) = @_; + my %r = %$recp; + + my $ip = $r{'row'}->{'source_ip'}; + #print "ip $ip\n"; + my $count = $r{'row'}->{'count'}; + my $disp = $r{'row'}->{'policy_evaluated'}->{'disposition'}; + my ($dkim, $dkimresult, $spf, $spfresult, $reason); + my $rp = $r{'auth_results'}->{'dkim'}; + if(ref $rp eq "HASH") { + $dkim = $rp->{'domain'}; + $dkim = undef if ref $dkim eq "HASH"; + $dkimresult = $rp->{'result'}; + } else { # array + # glom sigs together, report first result + $dkim = join '/',map { my $d = $_->{'domain'}; ref $d eq "HASH"?"": $d } @$rp; + $dkimresult = $rp->[0]->{'result'}; + } + $rp = $r{'auth_results'}->{'spf'}; + if(ref $rp eq "HASH") { + $spf = $rp->{'domain'}; + $spfresult = $rp->{'result'}; + } else { # array + # glom domains together, report first result + $spf = join '/',map { my $d = $_->{'domain'}; ref $d eq "HASH"? "": $d } @$rp; + $spfresult = $rp->[0]->{'result'}; + } + + $rp = $r{'row'}->{'policy_evaluated'}->{'reason'}; + if(ref $rp eq "HASH") { + $reason = $rp->{'type'}; + } else { + $reason = join '/',map { $_->{'type'} } @$rp; + } + #print "ip=$ip, count=$count, disp=$disp, r=$reason,"; + #print "dkim=$dkim/$dkimresult, spf=$spf/$spfresult\n"; + $dbh->do(qq{INSERT INTO rptrecord(serial,ip,rcount,disposition,reason,dkimdomain,dkimresult,spfdomain,spfresult) + VALUES(?,INET_ATON(?),?,?,?,?,?,?,?)},undef, $serial,$ip,$count,$disp,$reason,$dkim,$dkimresult,$spf,$spfresult) + or die "cannot insert record " . $dbh->{'mysql_error'}; + } + + if(ref $record eq "HASH") { + print "single record\n"; + dorow($serial,$record); + } elsif(ref $record eq "ARRAY") { + print "multi record\n"; + foreach my $row (@$record) { + dorow($serial,$row); + } + } else { + print "mystery type " . ref($record) . "\n"; + } + $zip->purge; + $ent->purge; +}