diff --git a/emlx/__init__.py b/emlx/__init__.py index 2764733..45c385b 100755 --- a/emlx/__init__.py +++ b/emlx/__init__.py @@ -1,2 +1 @@ -# import .message -# import .mailbox +__all__ = ["message", "mailbox", "progress"] diff --git a/emlx/mailbox.py b/emlx/mailbox.py index e69de29..7718b25 100644 --- a/emlx/mailbox.py +++ b/emlx/mailbox.py @@ -0,0 +1,227 @@ +import os +import logging + +from emlx.message import EmlxMessage + +# For partial message reassembly +import email +from email.policy import EmailPolicy + +# Just enough to get a regular message back out without +# significantly altering the message payload. +minimal_policy = EmailPolicy(linesep="\r\n", refold_source="none") +APPLE_MARKER = "X-Apple-Content-Length" + + +class AMMessageRef(object): + mailbox = None + msgid = 0 + partial = False + + def __repr__(self): + return "" % (self.msgid, self.partial, self.msg_path) + + def __init__(self, mailbox, msgid, partial=False): + self.mailbox = mailbox + self.msgid = msgid + self.partial = partial + + @property + def msg_dir(self): + msgid = str(self.msgid) + excess = [] + if len(msgid) > 3: + excess = list(msgid)[:-3] + excess.reverse() + + path = self.mailbox.messages_path + path = os.path.join(path, *excess) + return path + + @property + def msg_path(self): + filename = str(self.msgid) + if self.partial: + filename += ".partial" + filename += ".emlx" + + path = self.msg_dir + path = os.path.join(path, "Messages") + path = os.path.join(path, filename) + return path + + def part_path(self, partno): + partname = "%s.%s.emlxpart" % (self.msgid, str(partno)) + msg_dir, excess = os.path.split(self.msg_path) + return os.path.join(msg_dir, partname) + + def get_message(self): + path = self.msg_path + if path is None or len(path) == 0: + return None + + try: + f = open(path, "rb") + data = f.read() + f.close() + except Exception as e: + log.exception("get_message: %r", e) + return None + + # Parse EMLX data + msg = EmlxMessage(data) + + if self.partial: + logging.debug("%s: rebuilding partial message" % path) + + # Parse the email + email_msg = email.message_from_bytes(msg.content, policy=minimal_policy) + + # Iterate over the MIME payloads and look + # for the stub header. + def load_parts(message, prefix): + parts_needed = 0 + parts_loaded = 0 + + parts = message.get_payload() + for part in parts: + partno = parts.index(part) + 1 + + if part.is_multipart(): + load_parts(part, "%s.%d" % (prefix, partno)) + + elif part[APPLE_MARKER]: + parts_needed += 1 + + part_path = "%s.%d.emlxpart" % (prefix, partno) + logging.debug("%s: loading part %d" % (part_path, partno)) + + try: + f = open(part_path, "rb") + part_data = f.read() + f.close() + + part.set_payload(part_data) + del part[APPLE_MARKER] + + parts_loaded +=1 + + except Exception as e: + logging.exception("%s: error loading message part %d" % (path, partno)) + + if parts_loaded != parts_needed: + logging.warning("%s: message may be incomplete (found %d parts of %d)" % ( + prefix, + parts_loaded, + parts_needed) + ) + else: + logging.info("%s: sucessfully reassembled (%d parts)" % (prefix, parts_loaded)) + + dir_name = os.path.dirname(path) + prefix = os.path.join(dir_name, str(self.msgid)) + load_parts(email_msg, prefix) + + msg_bytes = email_msg.as_bytes() + msg.content = msg_bytes + + return msg + + +class AMMailbox(object): + parent = None + path = None + + def __init__(self, path, parent=None): + self.path = path + self.parent = parent + + def __str__(self): + return str(self.name) + + def __unicode__(self): + return unicode(self.name) + + def __repr__(self): + return "" % self.name + + @property + def name(self): + path = os.path.normpath(self.path) + name = os.path.basename(path) + base, ext = os.path.splitext(name) + + if self.parent is not None: + return "%s/%s" % (self.parent.name, base) + return base + + @property + def children(self): + boxes = [] + for dirent in os.scandir(self.path): + if dirent.is_dir() and dirent.name[-5:] == ".mbox": + boxes.append(AMMailbox(dirent.path, parent=self)) + return boxes + + @property + def all_children(self): + boxes = [] + for box in self.children: + boxes.append(box) + boxes.extend(box.all_children) + return boxes + + def _messages_at_path(self, path): + messages_path = os.path.join(path, "Messages") + messages = [] + + # logging.debug("looking for messages in %s", messages_path) + if os.path.exists(messages_path): + if os.path.isdir(messages_path): + for dirent in os.scandir(messages_path): + if dirent.is_file(): + (name, ext) = os.path.splitext(dirent.name) + if ext == ".emlx": + (msgid, partial) = os.path.splitext(name) + msg = AMMessageRef(self, msgid, partial=(len(partial) != 0)) + messages.append(msg) + # logging.debug("FOUND MESSAGE: %s", msg) + + else: + logging.debug("%s: not a directory; not considering for messages", messages_path) + + # Scan for tries and get their messages + for dirent in os.scandir(path): + if len(dirent.name) == 1 and dirent.name[0] in "0123456789": + # logging.debug(" inspecting trie %s", dirent.name) + trie_branch = os.path.join(path, dirent.name) + messages.extend(self._messages_at_path(trie_branch)) + + # logging.debug("found %d messages at %s", len(messages), messages_path) + return messages + + @property + def messages_path(self): + data_dir = None + # Our messages will be in a dir named with a GUID. + for dirent in os.scandir(self.path): + # GUID or GUID.noindex + if len(dirent.name) == 36 or len(dirent.name) == 44: + # logging.debug("looking for Data in %s", dirent.name) + data_path = os.path.join(dirent.path, "Data") + if os.path.isdir(data_path): + data_dir = data_path + break + return data_dir + + def messages(self): + data_dir = self.messages_path + + if data_dir is None: + # logging.debug("%s: no messages found", self.path) + return [] + + messages = self._messages_at_path(data_dir) + + # logging.debug("found %d messages", len(messages)) + return messages diff --git a/emlx/message.py b/emlx/message.py index 7250314..e4129a4 100755 --- a/emlx/message.py +++ b/emlx/message.py @@ -36,7 +36,7 @@ def __str__(self): return str(self.content) def __bytes__(self): - content_size = str(self.content_size).encode("utf8") + content_size = str(len(self.content)).encode("utf8") meta = plistlib.dumps(self.plist) return (content_size + b"\n" + self.content + meta) @@ -100,7 +100,6 @@ def date_last_viewed(self): def get_maildir_message(self): m = MaildirMessage(self.content) - m.set_subdir("new") if self.date_received is not None: m.set_date(self.date_received) diff --git a/emlx/script.py b/emlx/script.py new file mode 100755 index 0000000..1a0047b --- /dev/null +++ b/emlx/script.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python + +import argparse +import logging +import os +import sys +import time + +from maildir_lite import Maildir +from emlx.mailbox import AMMailbox + +from clint.textui import progress, colored + + +def main(argc, argv): + global STOP + + logging.basicConfig(format="%(message)s", level=logging.WARNING, stream=sys.stdout) + PROGRAM = os.path.basename(argv[0]) + + # Parse arguments + parser = argparse.ArgumentParser( + description="converts Apple Mail mailboxes into maildir-format mailboxes", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("-q", "--quiet", + action="store_true", help="no output") + parser.add_argument("-v", "--verbose", default=0, + action="count", help="show per-message progress and status") + parser.add_argument("-d", "--debug", + action="store_true", help="show everything. everything.") + parser.add_argument("-m", "--maildir", default="~/Maildir/", + help="path to maildir to import messages into (will create if nonexistant)") + parser.add_argument("-n", "--dry-run", + action="store_true", help="simulate actions only") + parser.add_argument("-r", "--recursive", + action="store_true", help="also import all subfolders") + parser.add_argument("-l", "--fs", + action="store_true", help="use FS layout for maildir subfolders instead of Maildir++") + parser.add_argument("source", nargs="+") + + args = parser.parse_args() + logging.info(args) + + if args.debug: + logging.getLogger().setLevel(logging.DEBUG) + logging.debug("Debug output enabled.") + if args.verbose: + if logging.getLogger().getEffectiveLevel() != logging.DEBUG: + logging.getLogger().setLevel(logging.INFO) + if args.quiet: + if logging.getLogger().getEffectiveLevel() != logging.DEBUG: + logging.getLogger().setLevel(logging.ERROR) + + paths = args.source + if len(paths) == 0: + paths = [os.path.expanduser("~/Library/Mail/")] + + + ### Process the paths + + for path in paths: + if not os.path.isdir(path): + logging.warning("path is not a directory: %s", path) + continue + + # See if the path is a mailbox container + v3_path = os.path.join(path, "V3") + if os.path.isdir(v3_path): + logging.debug("has V3 data") + path = v3_path + + local_mailboxes = os.path.join(path, "Mailboxes") + if os.path.isdir(local_mailboxes): + path = local_mailboxes + + # Load what should be a mailbox at this point + logging.info("processing source path: %s", path) + + mailboxes = AMMailbox(path) + logging.info("%s: found %d messages.", str(mailboxes), len(mailboxes.messages())) + + sources = [mailboxes] + if args.recursive: + sources.extend(mailboxes.all_children) + logging.debug("sources: %r" % sources) + + # if len(sources) == 0: + # logging.warning("no mailboxes found") + # sys.exit() + + # Scan for the total number of messags to import. + total_count = 0 + for box in sources: + total_count += len(box.messages()) + + # if total_count == 0: + # logging.warning("no messages found") + # sys.exit() + + if args.dry_run == False: + maildir = Maildir(args.maildir, create=True, lazy=True, fs_layout=args.fs) + + for box in sources: + logging.info("%s: starting import" % box.name) + if STOP: + break + for msg in progress.bar(box.messages(), expected_size=len(box.messages()), label="Importing %s: " % box.name): + if STOP: + break + if args.dry_run == False: + m = msg.get_message() + maildir.add_message(m.get_maildir_message()) + else: + if msg.partial: + m = msg.get_message() + + +def start(): + global STOP + + import signal + def signal_handler(sig, frame): + global STOP + if STOP: + signal.signal(signal.SIGINT, signal.SIG_IGN) + os.kill(os.getpid(), signal.SIGTERM) + STOP = True + signal.signal(signal.SIGINT, signal_handler) + + # You might be a C developer if... + STOP = False + argc = len(sys.argv) + argv = sys.argv + + sys.exit(main(argc, argv)) + +if __name__ == "__main__": + start() diff --git a/setup.py b/setup.py index 258272a..75bfd16 100755 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ # 3 - Alpha # 4 - Beta # 5 - Production/Stable - 'Development Status :: 5 - Production/Stable', + 'Development Status :: 3 - Alpha', # Indicate who your project is intended for 'Intended Audience :: Developers', @@ -54,6 +54,7 @@ # List run-time dependencies here. These will be installed by pip when your # project is installed. # install_requires = ['peppercorn'], + install_requires = ['maildir', 'clint'], # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these @@ -73,7 +74,7 @@ # pip to create the appropriate form of executable for the target platform. entry_points={ 'console_scripts': [ - 'emlx-to-maildir=emlx.converter:main', + 'emlx-to-maildir=emlx.script:start', ], }, )