-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added mailbox support, partial message reconstruction support, and a …
…proper import script.
- Loading branch information
Showing
5 changed files
with
371 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
# import .message | ||
# import .mailbox | ||
__all__ = ["message", "mailbox", "progress"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,227 @@ | ||
import os | ||
import logging | ||
|
||
from emlx.message import EmlxMessage | ||
|
||
# For partial message reassembly | ||
import email | ||
from email.policy import EmailPolicy | ||
|
||
# Just enough to get a regular message back out without | ||
# significantly altering the message payload. | ||
minimal_policy = EmailPolicy(linesep="\r\n", refold_source="none") | ||
APPLE_MARKER = "X-Apple-Content-Length" | ||
|
||
|
||
class AMMessageRef(object): | ||
mailbox = None | ||
msgid = 0 | ||
partial = False | ||
|
||
def __repr__(self): | ||
return "<AMMessageRef msgid=%r partial=%r path=%s>" % (self.msgid, self.partial, self.msg_path) | ||
|
||
def __init__(self, mailbox, msgid, partial=False): | ||
self.mailbox = mailbox | ||
self.msgid = msgid | ||
self.partial = partial | ||
|
||
@property | ||
def msg_dir(self): | ||
msgid = str(self.msgid) | ||
excess = [] | ||
if len(msgid) > 3: | ||
excess = list(msgid)[:-3] | ||
excess.reverse() | ||
|
||
path = self.mailbox.messages_path | ||
path = os.path.join(path, *excess) | ||
return path | ||
|
||
@property | ||
def msg_path(self): | ||
filename = str(self.msgid) | ||
if self.partial: | ||
filename += ".partial" | ||
filename += ".emlx" | ||
|
||
path = self.msg_dir | ||
path = os.path.join(path, "Messages") | ||
path = os.path.join(path, filename) | ||
return path | ||
|
||
def part_path(self, partno): | ||
partname = "%s.%s.emlxpart" % (self.msgid, str(partno)) | ||
msg_dir, excess = os.path.split(self.msg_path) | ||
return os.path.join(msg_dir, partname) | ||
|
||
def get_message(self): | ||
path = self.msg_path | ||
if path is None or len(path) == 0: | ||
return None | ||
|
||
try: | ||
f = open(path, "rb") | ||
data = f.read() | ||
f.close() | ||
except Exception as e: | ||
log.exception("get_message: %r", e) | ||
return None | ||
|
||
# Parse EMLX data | ||
msg = EmlxMessage(data) | ||
|
||
if self.partial: | ||
logging.debug("%s: rebuilding partial message" % path) | ||
|
||
# Parse the email | ||
email_msg = email.message_from_bytes(msg.content, policy=minimal_policy) | ||
|
||
# Iterate over the MIME payloads and look | ||
# for the stub header. | ||
def load_parts(message, prefix): | ||
parts_needed = 0 | ||
parts_loaded = 0 | ||
|
||
parts = message.get_payload() | ||
for part in parts: | ||
partno = parts.index(part) + 1 | ||
|
||
if part.is_multipart(): | ||
load_parts(part, "%s.%d" % (prefix, partno)) | ||
|
||
elif part[APPLE_MARKER]: | ||
parts_needed += 1 | ||
|
||
part_path = "%s.%d.emlxpart" % (prefix, partno) | ||
logging.debug("%s: loading part %d" % (part_path, partno)) | ||
|
||
try: | ||
f = open(part_path, "rb") | ||
part_data = f.read() | ||
f.close() | ||
|
||
part.set_payload(part_data) | ||
del part[APPLE_MARKER] | ||
|
||
parts_loaded +=1 | ||
|
||
except Exception as e: | ||
logging.exception("%s: error loading message part %d" % (path, partno)) | ||
|
||
if parts_loaded != parts_needed: | ||
logging.warning("%s: message may be incomplete (found %d parts of %d)" % ( | ||
prefix, | ||
parts_loaded, | ||
parts_needed) | ||
) | ||
else: | ||
logging.info("%s: sucessfully reassembled (%d parts)" % (prefix, parts_loaded)) | ||
|
||
dir_name = os.path.dirname(path) | ||
prefix = os.path.join(dir_name, str(self.msgid)) | ||
load_parts(email_msg, prefix) | ||
|
||
msg_bytes = email_msg.as_bytes() | ||
msg.content = msg_bytes | ||
|
||
return msg | ||
|
||
|
||
class AMMailbox(object): | ||
parent = None | ||
path = None | ||
|
||
def __init__(self, path, parent=None): | ||
self.path = path | ||
self.parent = parent | ||
|
||
def __str__(self): | ||
return str(self.name) | ||
|
||
def __unicode__(self): | ||
return unicode(self.name) | ||
|
||
def __repr__(self): | ||
return "<AMMailbox name='%s'>" % self.name | ||
|
||
@property | ||
def name(self): | ||
path = os.path.normpath(self.path) | ||
name = os.path.basename(path) | ||
base, ext = os.path.splitext(name) | ||
|
||
if self.parent is not None: | ||
return "%s/%s" % (self.parent.name, base) | ||
return base | ||
|
||
@property | ||
def children(self): | ||
boxes = [] | ||
for dirent in os.scandir(self.path): | ||
if dirent.is_dir() and dirent.name[-5:] == ".mbox": | ||
boxes.append(AMMailbox(dirent.path, parent=self)) | ||
return boxes | ||
|
||
@property | ||
def all_children(self): | ||
boxes = [] | ||
for box in self.children: | ||
boxes.append(box) | ||
boxes.extend(box.all_children) | ||
return boxes | ||
|
||
def _messages_at_path(self, path): | ||
messages_path = os.path.join(path, "Messages") | ||
messages = [] | ||
|
||
# logging.debug("looking for messages in %s", messages_path) | ||
if os.path.exists(messages_path): | ||
if os.path.isdir(messages_path): | ||
for dirent in os.scandir(messages_path): | ||
if dirent.is_file(): | ||
(name, ext) = os.path.splitext(dirent.name) | ||
if ext == ".emlx": | ||
(msgid, partial) = os.path.splitext(name) | ||
msg = AMMessageRef(self, msgid, partial=(len(partial) != 0)) | ||
messages.append(msg) | ||
# logging.debug("FOUND MESSAGE: %s", msg) | ||
|
||
else: | ||
logging.debug("%s: not a directory; not considering for messages", messages_path) | ||
|
||
# Scan for tries and get their messages | ||
for dirent in os.scandir(path): | ||
if len(dirent.name) == 1 and dirent.name[0] in "0123456789": | ||
# logging.debug(" inspecting trie %s", dirent.name) | ||
trie_branch = os.path.join(path, dirent.name) | ||
messages.extend(self._messages_at_path(trie_branch)) | ||
|
||
# logging.debug("found %d messages at %s", len(messages), messages_path) | ||
return messages | ||
|
||
@property | ||
def messages_path(self): | ||
data_dir = None | ||
# Our messages will be in a dir named with a GUID. | ||
for dirent in os.scandir(self.path): | ||
# GUID or GUID.noindex | ||
if len(dirent.name) == 36 or len(dirent.name) == 44: | ||
# logging.debug("looking for Data in %s", dirent.name) | ||
data_path = os.path.join(dirent.path, "Data") | ||
if os.path.isdir(data_path): | ||
data_dir = data_path | ||
break | ||
return data_dir | ||
|
||
def messages(self): | ||
data_dir = self.messages_path | ||
|
||
if data_dir is None: | ||
# logging.debug("%s: no messages found", self.path) | ||
return [] | ||
|
||
messages = self._messages_at_path(data_dir) | ||
|
||
# logging.debug("found %d messages", len(messages)) | ||
return messages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import logging | ||
import os | ||
import sys | ||
import time | ||
|
||
from maildir_lite import Maildir | ||
from emlx.mailbox import AMMailbox | ||
|
||
from clint.textui import progress, colored | ||
|
||
|
||
def main(argc, argv): | ||
global STOP | ||
|
||
logging.basicConfig(format="%(message)s", level=logging.WARNING, stream=sys.stdout) | ||
PROGRAM = os.path.basename(argv[0]) | ||
|
||
# Parse arguments | ||
parser = argparse.ArgumentParser( | ||
description="converts Apple Mail mailboxes into maildir-format mailboxes", | ||
formatter_class=argparse.ArgumentDefaultsHelpFormatter | ||
) | ||
parser.add_argument("-q", "--quiet", | ||
action="store_true", help="no output") | ||
parser.add_argument("-v", "--verbose", default=0, | ||
action="count", help="show per-message progress and status") | ||
parser.add_argument("-d", "--debug", | ||
action="store_true", help="show everything. everything.") | ||
parser.add_argument("-m", "--maildir", default="~/Maildir/", | ||
help="path to maildir to import messages into (will create if nonexistant)") | ||
parser.add_argument("-n", "--dry-run", | ||
action="store_true", help="simulate actions only") | ||
parser.add_argument("-r", "--recursive", | ||
action="store_true", help="also import all subfolders") | ||
parser.add_argument("-l", "--fs", | ||
action="store_true", help="use FS layout for maildir subfolders instead of Maildir++") | ||
parser.add_argument("source", nargs="+") | ||
|
||
args = parser.parse_args() | ||
logging.info(args) | ||
|
||
if args.debug: | ||
logging.getLogger().setLevel(logging.DEBUG) | ||
logging.debug("Debug output enabled.") | ||
if args.verbose: | ||
if logging.getLogger().getEffectiveLevel() != logging.DEBUG: | ||
logging.getLogger().setLevel(logging.INFO) | ||
if args.quiet: | ||
if logging.getLogger().getEffectiveLevel() != logging.DEBUG: | ||
logging.getLogger().setLevel(logging.ERROR) | ||
|
||
paths = args.source | ||
if len(paths) == 0: | ||
paths = [os.path.expanduser("~/Library/Mail/")] | ||
|
||
|
||
### Process the paths | ||
|
||
for path in paths: | ||
if not os.path.isdir(path): | ||
logging.warning("path is not a directory: %s", path) | ||
continue | ||
|
||
# See if the path is a mailbox container | ||
v3_path = os.path.join(path, "V3") | ||
if os.path.isdir(v3_path): | ||
logging.debug("has V3 data") | ||
path = v3_path | ||
|
||
local_mailboxes = os.path.join(path, "Mailboxes") | ||
if os.path.isdir(local_mailboxes): | ||
path = local_mailboxes | ||
|
||
# Load what should be a mailbox at this point | ||
logging.info("processing source path: %s", path) | ||
|
||
mailboxes = AMMailbox(path) | ||
logging.info("%s: found %d messages.", str(mailboxes), len(mailboxes.messages())) | ||
|
||
sources = [mailboxes] | ||
if args.recursive: | ||
sources.extend(mailboxes.all_children) | ||
logging.debug("sources: %r" % sources) | ||
|
||
# if len(sources) == 0: | ||
# logging.warning("no mailboxes found") | ||
# sys.exit() | ||
|
||
# Scan for the total number of messags to import. | ||
total_count = 0 | ||
for box in sources: | ||
total_count += len(box.messages()) | ||
|
||
# if total_count == 0: | ||
# logging.warning("no messages found") | ||
# sys.exit() | ||
|
||
if args.dry_run == False: | ||
maildir = Maildir(args.maildir, create=True, lazy=True, fs_layout=args.fs) | ||
|
||
for box in sources: | ||
logging.info("%s: starting import" % box.name) | ||
if STOP: | ||
break | ||
for msg in progress.bar(box.messages(), expected_size=len(box.messages()), label="Importing %s: " % box.name): | ||
if STOP: | ||
break | ||
if args.dry_run == False: | ||
m = msg.get_message() | ||
maildir.add_message(m.get_maildir_message()) | ||
else: | ||
if msg.partial: | ||
m = msg.get_message() | ||
|
||
|
||
def start(): | ||
global STOP | ||
|
||
import signal | ||
def signal_handler(sig, frame): | ||
global STOP | ||
if STOP: | ||
signal.signal(signal.SIGINT, signal.SIG_IGN) | ||
os.kill(os.getpid(), signal.SIGTERM) | ||
STOP = True | ||
signal.signal(signal.SIGINT, signal_handler) | ||
|
||
# You might be a C developer if... | ||
STOP = False | ||
argc = len(sys.argv) | ||
argv = sys.argv | ||
|
||
sys.exit(main(argc, argv)) | ||
|
||
if __name__ == "__main__": | ||
start() |
Oops, something went wrong.