hugo-page-test/scripts/read-competition-notification/competitionNotificationReader/mailParser.py

import competitionNotificationReader as cnr
import logging
import re

class MailParser:
    def __init__(self):
        self._l = logging.getLogger(__name__)

    def parseMail(self, rawMail: cnr.mail.Mail):
        # Look for the correct Mail encoding
        contentType, boundary = self._getContentType(rawMail)
        subMails = self._splitMultipartBody(rawMail.body, boundary)

        def isCorrectContentType(mail):
            for header in mail.headers:
                if header[0].lower() != 'content-type':
                    continue
                return header[1].startswith('text/html')
            return False
        subMails = list(filter(isCorrectContentType, subMails))

        def isCorrectContentEncoding(mail):
            for header in mail.headers:
                if header[0].lower() != 'content-transfer-encoding':
                    continue
                return header[1] == 'quoted-printable'
            return False
        subMails = list(filter(isCorrectContentEncoding, subMails))

        if len(subMails) != 1:
            raise Exception('Not implemented')

        body = self._mapQuotedrintable(subMails[0].body)
        return body


    def _getContentType(self, rawMail: cnr.mail.Mail) -> str:
        ctHeaders = list(filter(lambda x: x[0].lower() == 'content-type', rawMail.headers))
        if len(ctHeaders) != 1:
            self._l.error('No unique content type of the mail was found.')
            exit(1)

        ct = ctHeaders[0][1]
        if not ct.startswith('multipart/alternative'):
            raise Exception('Not yet implemented')

        parser = re.compile('.*boundary="([^"]+)"')
        matcher = parser.match(ct)
        if matcher is None:
            self._l.error('Cannot extract boundary from mail header.')
            exit(1)

        boundary = matcher.group(1)

        return 'multipart/alternative', boundary

    def _splitMultipartBody(self, bodyLines: list[str], boundary: str):
        parts = []
        subBody = []
        for line in bodyLines:
            if line.startswith(f'--{boundary}'):
                if len(subBody) > 0:
                    parts.append(subBody)
                    subBody = []
            else:
                subBody.append(line)
        return list(map(lambda x: cnr.headerExtractor.splitHeaders(x), parts))

    def _mapQuotedrintable(self, lines: list[str]):
        def mergeLines():
            # Drop terminating newlines
            ret = [l for l in lines]
            r = list(range(len(ret)))
            r.reverse()
            for i in r:
                currentLine = ret[i]
                if currentLine.endswith('='):
                    currentLine = currentLine[:-1] + ret.pop(i+1)
                    ret[i] = currentLine

            return ret

        mergedLines = mergeLines()

        def mapUnicodeChars():
            ret = []
            for line in mergedLines:
                i = 0
                chars = []
                while i < len(line):
                    if line[i] != '=':
                        chars.extend(list(line[i].encode()))
                    else:
                        hexChars = line[i+1:i+3]
                        value = int(hexChars, 16)
                        # print(f'{hexChars} -> {value}')
                        chars.append(value)
                        i += 2
                    i += 1
                ret.append(chars)

            return ret

        mappedLines = mapUnicodeChars()

        def decodeLine(l):
            bytes = [x.to_bytes(1, 'big') for x in l]
            decodedLine = b''.join(bytes).decode()
            return decodedLine
        decodedLines = list(map(decodeLine, mappedLines))

        return ''.join(decodedLines)