import bs4 import logging import re import os import jinja2 class ParsingFailedEception(Exception): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) class CompetitionParser: def __init__(self): self._l = logging.getLogger(__name__) self._partner = '' self._partnerin = '' self._date = '' self._title = '' self._number = '' self._group = '' self._class = '' self._section = '' self._ort = '' self._verein = '' self._telefon = '' self._reName = re.compile('Neue Meldung für (.*) / (.*)!') self._reDate = re.compile('([0-9]+)\.([0-9]+)\.([0-9]+)') self._reNumber = re.compile('Turnier: ([0-9]+)') self._rePhone = re.compile('Telefon: (\\+?[0-9 /-]+)') self._rePlace = re.compile('Ort: (.*), (.*)') self._reCompetition = re.compile('(.*) ([A-ES]) ((?:Std)|(?:Lat)|(?:Kombi))') self._reWDSFCompetition = re.compile('WDSF Open ([a-zA-Z0-9 ]*) ((?:Standard)|(?:Latin))(?: *-.*)?') self._reWDSFCompetitionReversed = re.compile('WDSF Open ((?:Standard)|(?:Latin)) ([a-zA-Z0-9 ]*)(?: *-.*)?') self._reCleaningString = re.compile('[^a-z0-9-]') self._reDashes = re.compile('-+') def parseMail(self, body: str): parser = bs4.BeautifulSoup(body, 'html.parser') self._getNames(parser.h2) self._parseTable(parser.table) def _getNames(self, h2): matcher = self._reName.match(h2.string) if matcher is None: self._l.error('Parsing of header "%s" failed.', h2) raise ParsingFailedEception('Header could not be successfully parsed') self._partner = matcher.group(1) self._partnerin = matcher.group(2) def _parseTable(self, table): def parseDate(date): match = self._reDate.fullmatch(date) if match is None: raise ParsingFailedEception('Cannot parse date %s in mail' % date) self._date = f'{match.group(3)}-{match.group(2)}-{match.group(1)}' def parseNumber(content): match = self._reNumber.fullmatch(content) if match is None: raise ParsingFailedEception(f'Cannot parse the turnier number in field {content}') self._number = match.group(1) def parseCompetition(competition): def parseDTVCompetition(): match = self._reCompetition.fullmatch(competition) if match is None: raise ParsingFailedEception(f'Cannot parse the competition line {competition}') self._group = match.group(1) self._class = match.group(2) self._section = match.group(3) def parseWDSFCompetition(): def checkMatch(match): if match is None: raise ParsingFailedEception(f'Cannot parse WDSF competition line') def parseForward(): match = self._reWDSFCompetition.fullmatch(competition.strip()) checkMatch(match) return match.group(2), match.group(1) def parseReverse(): match = self._reWDSFCompetitionReversed.fullmatch(competition.strip()) checkMatch(match) return match.group(1), match.group(2).strip() groupMap = { 'juvenile i': 'Kin', 'juvenile ii': 'Kin', 'junior i': 'Jun 1', 'junior ii': 'Jun 2', 'youth': 'Jug', 'adult': 'Hgr', 'senior i': 'Mas I', 'senior ii': 'Mas II', 'senior iii': 'Mas III', 'senior iv': 'Mas IV', 'senior v': 'Mas V', } sectionMap = { 'standard': 'Std', 'latin': 'Lat', } funs = [parseForward, parseReverse] for fun in funs: try: sec, grp = fun() self._group = groupMap.get(grp.lower(), grp) self._class = 'WDSF Open' self._section = sectionMap.get(sec.lower(), sec) return except ParsingFailedEception: pass raise ParsingFailedEception('Neither forward not reversed parsing worked') functions = [parseDTVCompetition, parseWDSFCompetition] for fun in functions: try: fun() return except ParsingFailedEception: pass raise ParsingFailedEception(f'No more matchers for the competition line "{competition}" were left.') def parsePlace(place): match = self._rePlace.fullmatch(place) if match is None: raise ParsingFailedEception(f'Cannot parse the place entry {place}') self._verein = match.group(1) self._ort = match.group(2) def parsePhone(phone): match = self._rePhone.fullmatch(phone) if match is None: raise ParsingFailedEception(f'Cannot parse the phone line {phone}') self._telefon = match.group(1) tds = table('td') parseDate(tds[0].string.strip()) self._title = tds[1].string.strip() parseNumber(tds[2].string.strip()) parseCompetition(tds[3].string.strip()) parsePlace(tds[4].string.strip()) parsePhone(tds[5].string.strip()) def _cleanName(self, name: str) -> str: cleanedName = name.lower() cleanedName = re.sub('ä', 'ae', cleanedName) cleanedName = re.sub('ö', 'oe', cleanedName) cleanedName = re.sub('ü', 'ue', cleanedName) cleanedName = re.sub('ß', 'ss', cleanedName) cleanedName = re.sub(self._reCleaningString, '-', cleanedName) cleanedName = re.sub(self._reDashes, '-', cleanedName) return cleanedName.lower() def getFilename(self, prefix: str) -> str: namePartner = self._cleanName(self._partner) namePartnerin = self._cleanName(self._partnerin) competition = f'{self._group} {self._class} {self._section}' competitionName = self._cleanName(competition) ort = self._cleanName(self._ort) filename = f'{self._date}-{ort}-{namePartner}-{namePartnerin}-{competitionName}.md' return os.path.join( prefix, self._date[0:4], re.sub(self._reDashes, '-', filename) ) def getContent(self) -> str: with open(os.path.join(os.path.dirname(__file__), 'contenttemplate.md.tmpl')) as fp: tpl = fp.read() j2 = jinja2.Template(tpl) vars = { 'date': self._date, 'partner': self._partner, 'partnerin': self._partnerin, 'verein': self._verein, 'ort': self._ort, 'telefon': self._telefon, 'group': self._group, 'class': self._class, 'section': self._section, 'title': self._title, 'number': self._number, } return j2.render(**vars)