Skip to content

Commit

Permalink
Added mailbox support, partial message reconstruction support, and a …
Browse files Browse the repository at this point in the history
…proper import script.
  • Loading branch information
ahknight committed Apr 17, 2016
1 parent 0ff75bc commit 86dca79
Show file tree
Hide file tree
Showing 5 changed files with 371 additions and 6 deletions.
3 changes: 1 addition & 2 deletions emlx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
# import .message
# import .mailbox
__all__ = ["message", "mailbox", "progress"]
227 changes: 227 additions & 0 deletions emlx/mailbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import os
import logging

from emlx.message import EmlxMessage

# For partial message reassembly
import email
from email.policy import EmailPolicy

# Just enough to get a regular message back out without
# significantly altering the message payload.
minimal_policy = EmailPolicy(linesep="\r\n", refold_source="none")
APPLE_MARKER = "X-Apple-Content-Length"


class AMMessageRef(object):
mailbox = None
msgid = 0
partial = False

def __repr__(self):
return "<AMMessageRef msgid=%r partial=%r path=%s>" % (self.msgid, self.partial, self.msg_path)

def __init__(self, mailbox, msgid, partial=False):
self.mailbox = mailbox
self.msgid = msgid
self.partial = partial

@property
def msg_dir(self):
msgid = str(self.msgid)
excess = []
if len(msgid) > 3:
excess = list(msgid)[:-3]
excess.reverse()

path = self.mailbox.messages_path
path = os.path.join(path, *excess)
return path

@property
def msg_path(self):
filename = str(self.msgid)
if self.partial:
filename += ".partial"
filename += ".emlx"

path = self.msg_dir
path = os.path.join(path, "Messages")
path = os.path.join(path, filename)
return path

def part_path(self, partno):
partname = "%s.%s.emlxpart" % (self.msgid, str(partno))
msg_dir, excess = os.path.split(self.msg_path)
return os.path.join(msg_dir, partname)

def get_message(self):
path = self.msg_path
if path is None or len(path) == 0:
return None

try:
f = open(path, "rb")
data = f.read()
f.close()
except Exception as e:
log.exception("get_message: %r", e)
return None

# Parse EMLX data
msg = EmlxMessage(data)

if self.partial:
logging.debug("%s: rebuilding partial message" % path)

# Parse the email
email_msg = email.message_from_bytes(msg.content, policy=minimal_policy)

# Iterate over the MIME payloads and look
# for the stub header.
def load_parts(message, prefix):
parts_needed = 0
parts_loaded = 0

parts = message.get_payload()
for part in parts:
partno = parts.index(part) + 1

if part.is_multipart():
load_parts(part, "%s.%d" % (prefix, partno))

elif part[APPLE_MARKER]:
parts_needed += 1

part_path = "%s.%d.emlxpart" % (prefix, partno)
logging.debug("%s: loading part %d" % (part_path, partno))

try:
f = open(part_path, "rb")
part_data = f.read()
f.close()

part.set_payload(part_data)
del part[APPLE_MARKER]

parts_loaded +=1

except Exception as e:
logging.exception("%s: error loading message part %d" % (path, partno))

if parts_loaded != parts_needed:
logging.warning("%s: message may be incomplete (found %d parts of %d)" % (
prefix,
parts_loaded,
parts_needed)
)
else:
logging.info("%s: sucessfully reassembled (%d parts)" % (prefix, parts_loaded))

dir_name = os.path.dirname(path)
prefix = os.path.join(dir_name, str(self.msgid))
load_parts(email_msg, prefix)

msg_bytes = email_msg.as_bytes()
msg.content = msg_bytes

return msg


class AMMailbox(object):
parent = None
path = None

def __init__(self, path, parent=None):
self.path = path
self.parent = parent

def __str__(self):
return str(self.name)

def __unicode__(self):
return unicode(self.name)

def __repr__(self):
return "<AMMailbox name='%s'>" % self.name

@property
def name(self):
path = os.path.normpath(self.path)
name = os.path.basename(path)
base, ext = os.path.splitext(name)

if self.parent is not None:
return "%s/%s" % (self.parent.name, base)
return base

@property
def children(self):
boxes = []
for dirent in os.scandir(self.path):
if dirent.is_dir() and dirent.name[-5:] == ".mbox":
boxes.append(AMMailbox(dirent.path, parent=self))
return boxes

@property
def all_children(self):
boxes = []
for box in self.children:
boxes.append(box)
boxes.extend(box.all_children)
return boxes

def _messages_at_path(self, path):
messages_path = os.path.join(path, "Messages")
messages = []

# logging.debug("looking for messages in %s", messages_path)
if os.path.exists(messages_path):
if os.path.isdir(messages_path):
for dirent in os.scandir(messages_path):
if dirent.is_file():
(name, ext) = os.path.splitext(dirent.name)
if ext == ".emlx":
(msgid, partial) = os.path.splitext(name)
msg = AMMessageRef(self, msgid, partial=(len(partial) != 0))
messages.append(msg)
# logging.debug("FOUND MESSAGE: %s", msg)

else:
logging.debug("%s: not a directory; not considering for messages", messages_path)

# Scan for tries and get their messages
for dirent in os.scandir(path):
if len(dirent.name) == 1 and dirent.name[0] in "0123456789":
# logging.debug(" inspecting trie %s", dirent.name)
trie_branch = os.path.join(path, dirent.name)
messages.extend(self._messages_at_path(trie_branch))

# logging.debug("found %d messages at %s", len(messages), messages_path)
return messages

@property
def messages_path(self):
data_dir = None
# Our messages will be in a dir named with a GUID.
for dirent in os.scandir(self.path):
# GUID or GUID.noindex
if len(dirent.name) == 36 or len(dirent.name) == 44:
# logging.debug("looking for Data in %s", dirent.name)
data_path = os.path.join(dirent.path, "Data")
if os.path.isdir(data_path):
data_dir = data_path
break
return data_dir

def messages(self):
data_dir = self.messages_path

if data_dir is None:
# logging.debug("%s: no messages found", self.path)
return []

messages = self._messages_at_path(data_dir)

# logging.debug("found %d messages", len(messages))
return messages
3 changes: 1 addition & 2 deletions emlx/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __str__(self):
return str(self.content)

def __bytes__(self):
content_size = str(self.content_size).encode("utf8")
content_size = str(len(self.content)).encode("utf8")
meta = plistlib.dumps(self.plist)
return (content_size + b"\n" + self.content + meta)

Expand Down Expand Up @@ -100,7 +100,6 @@ def date_last_viewed(self):

def get_maildir_message(self):
m = MaildirMessage(self.content)
m.set_subdir("new")

if self.date_received is not None:
m.set_date(self.date_received)
Expand Down
139 changes: 139 additions & 0 deletions emlx/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env python

import argparse
import logging
import os
import sys
import time

from maildir_lite import Maildir
from emlx.mailbox import AMMailbox

from clint.textui import progress, colored


def main(argc, argv):
global STOP

logging.basicConfig(format="%(message)s", level=logging.WARNING, stream=sys.stdout)
PROGRAM = os.path.basename(argv[0])

# Parse arguments
parser = argparse.ArgumentParser(
description="converts Apple Mail mailboxes into maildir-format mailboxes",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("-q", "--quiet",
action="store_true", help="no output")
parser.add_argument("-v", "--verbose", default=0,
action="count", help="show per-message progress and status")
parser.add_argument("-d", "--debug",
action="store_true", help="show everything. everything.")
parser.add_argument("-m", "--maildir", default="~/Maildir/",
help="path to maildir to import messages into (will create if nonexistant)")
parser.add_argument("-n", "--dry-run",
action="store_true", help="simulate actions only")
parser.add_argument("-r", "--recursive",
action="store_true", help="also import all subfolders")
parser.add_argument("-l", "--fs",
action="store_true", help="use FS layout for maildir subfolders instead of Maildir++")
parser.add_argument("source", nargs="+")

args = parser.parse_args()
logging.info(args)

if args.debug:
logging.getLogger().setLevel(logging.DEBUG)
logging.debug("Debug output enabled.")
if args.verbose:
if logging.getLogger().getEffectiveLevel() != logging.DEBUG:
logging.getLogger().setLevel(logging.INFO)
if args.quiet:
if logging.getLogger().getEffectiveLevel() != logging.DEBUG:
logging.getLogger().setLevel(logging.ERROR)

paths = args.source
if len(paths) == 0:
paths = [os.path.expanduser("~/Library/Mail/")]


### Process the paths

for path in paths:
if not os.path.isdir(path):
logging.warning("path is not a directory: %s", path)
continue

# See if the path is a mailbox container
v3_path = os.path.join(path, "V3")
if os.path.isdir(v3_path):
logging.debug("has V3 data")
path = v3_path

local_mailboxes = os.path.join(path, "Mailboxes")
if os.path.isdir(local_mailboxes):
path = local_mailboxes

# Load what should be a mailbox at this point
logging.info("processing source path: %s", path)

mailboxes = AMMailbox(path)
logging.info("%s: found %d messages.", str(mailboxes), len(mailboxes.messages()))

sources = [mailboxes]
if args.recursive:
sources.extend(mailboxes.all_children)
logging.debug("sources: %r" % sources)

# if len(sources) == 0:
# logging.warning("no mailboxes found")
# sys.exit()

# Scan for the total number of messags to import.
total_count = 0
for box in sources:
total_count += len(box.messages())

# if total_count == 0:
# logging.warning("no messages found")
# sys.exit()

if args.dry_run == False:
maildir = Maildir(args.maildir, create=True, lazy=True, fs_layout=args.fs)

for box in sources:
logging.info("%s: starting import" % box.name)
if STOP:
break
for msg in progress.bar(box.messages(), expected_size=len(box.messages()), label="Importing %s: " % box.name):
if STOP:
break
if args.dry_run == False:
m = msg.get_message()
maildir.add_message(m.get_maildir_message())
else:
if msg.partial:
m = msg.get_message()


def start():
global STOP

import signal
def signal_handler(sig, frame):
global STOP
if STOP:
signal.signal(signal.SIGINT, signal.SIG_IGN)
os.kill(os.getpid(), signal.SIGTERM)
STOP = True
signal.signal(signal.SIGINT, signal_handler)

# You might be a C developer if...
STOP = False
argc = len(sys.argv)
argv = sys.argv

sys.exit(main(argc, argv))

if __name__ == "__main__":
start()
Loading

0 comments on commit 86dca79

Please sign in to comment.