diff --git a/src/mailparser/const.py b/src/mailparser/const.py index 2b223f5..ce44b7f 100644 --- a/src/mailparser/const.py +++ b/src/mailparser/const.py @@ -78,7 +78,8 @@ EPILOGUE_DEFECTS = {"StartBoundaryNotFoundDefect"} -ADDRESSES_HEADERS = set(["bcc", "cc", "delivered-to", "from", "reply-to", "to"]) +ADDRESS_HEADERS = set(["delivered-to", "from", "sender"]) +ADDRESSES_HEADERS = set(["bcc", "cc", "reply-to", "to"]) # These parts are always returned OTHERS_PARTS = set( diff --git a/src/mailparser/core.py b/src/mailparser/core.py index 353e326..6433b2b 100644 --- a/src/mailparser/core.py +++ b/src/mailparser/core.py @@ -27,7 +27,7 @@ import six import json -from mailparser.const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP +from mailparser.const import ADDRESS_HEADERS, ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP from mailparser.utils import ( convert_mail_date, @@ -587,9 +587,33 @@ def __getattr__(self, name): return json.dumps(raw, ensure_ascii=False) # object headers + elif name_header in ADDRESS_HEADERS: + h = decode_header_part(self.message.get(name_header, six.text_type())) + if h != "": + parsed_address = email.utils.parseaddr(h) + if parsed_address == ('',''): + defect = "Invalid {} header".format(name_header) + if defect not in self._defects: + self._defects.append(defect) + self._has_defects = True + parsed_address = h.split("<") + parsed_address = (parsed_address[0].strip().strip('"'), + parsed_address[-1].strip(">")) + return parsed_address + elif name_header in ADDRESSES_HEADERS: h = decode_header_part(self.message.get(name_header, six.text_type())) - return email.utils.getaddresses([h]) + if h == "": + return [] + parsed_addresses = email.utils.getaddresses([h]) + if ('','') in parsed_addresses: + while ('','') in parsed_addresses: + parsed_addresses.remove(('','')) + defect = "Invalid {} header".format(name_header) + if defect not in self._defects: + self._defects.append(defect) + self._has_defects = True + return parsed_addresses # others headers else: diff --git a/src/mailparser/utils.py b/src/mailparser/utils.py index 5800c0a..6f44d62 100644 --- a/src/mailparser/utils.py +++ b/src/mailparser/utils.py @@ -42,6 +42,7 @@ import six from mailparser.const import ( + ADDRESS_HEADERS, ADDRESSES_HEADERS, JUNK_PATTERN, OTHERS_PARTS, @@ -519,10 +520,11 @@ def get_mail_keys(message, complete=True): if complete: log.debug("Get all headers") all_headers_keys = {i.lower() for i in message.keys()} - all_parts = ADDRESSES_HEADERS | OTHERS_PARTS | all_headers_keys + all_parts = ADDRESS_HEADERS | ADDRESSES_HEADERS | OTHERS_PARTS | \ + all_headers_keys else: log.debug("Get only mains headers") - all_parts = ADDRESSES_HEADERS | OTHERS_PARTS + all_parts = ADDRESS_HEADERS | ADDRESSES_HEADERS | OTHERS_PARTS log.debug("All parts to get: {}".format(", ".join(all_parts))) return all_parts