2024-01-14 20:41:17 +01:00
|
|
|
import bs4
|
|
|
|
import logging
|
|
|
|
import re
|
|
|
|
import os
|
|
|
|
import jinja2
|
|
|
|
|
|
|
|
class ParsingFailedEception(Exception):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
|
|
class CompetitionParser:
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self._l = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
self._partner = ''
|
|
|
|
self._partnerin = ''
|
|
|
|
self._date = ''
|
|
|
|
self._title = ''
|
|
|
|
self._number = ''
|
|
|
|
self._group = ''
|
|
|
|
self._class = ''
|
|
|
|
self._section = ''
|
|
|
|
self._ort = ''
|
|
|
|
self._verein = ''
|
|
|
|
self._telefon = ''
|
|
|
|
|
|
|
|
self._reName = re.compile('Neue Meldung für (.*) / (.*)!')
|
2024-10-18 10:28:13 +02:00
|
|
|
self._reDate = re.compile('([0-9]+)\\.([0-9]+)\\.([0-9]+)')
|
2024-01-14 20:41:17 +01:00
|
|
|
self._reNumber = re.compile('Turnier: ([0-9]+)')
|
2024-03-30 12:49:26 +01:00
|
|
|
self._rePhone = re.compile('Telefon: (\\+?[0-9 /-]+)')
|
2024-01-14 20:41:17 +01:00
|
|
|
self._rePlace = re.compile('Ort: (.*), (.*)')
|
|
|
|
self._reCompetition = re.compile('(.*) ([A-ES]) ((?:Std)|(?:Lat)|(?:Kombi))')
|
2024-03-30 12:49:26 +01:00
|
|
|
self._reWDSFCompetition = re.compile('WDSF Open ([a-zA-Z0-9 ]*) ((?:Standard)|(?:Latin))(?: *-.*)?')
|
|
|
|
self._reWDSFCompetitionReversed = re.compile('WDSF Open ((?:Standard)|(?:Latin)) ([a-zA-Z0-9 ]*)(?: *-.*)?')
|
2024-01-14 20:41:17 +01:00
|
|
|
|
|
|
|
self._reCleaningString = re.compile('[^a-z0-9-]')
|
|
|
|
self._reDashes = re.compile('-+')
|
|
|
|
|
|
|
|
def parseMail(self, body: str):
|
|
|
|
parser = bs4.BeautifulSoup(body, 'html.parser')
|
|
|
|
self._getNames(parser.h2)
|
|
|
|
self._parseTable(parser.table)
|
|
|
|
|
|
|
|
def _getNames(self, h2):
|
|
|
|
matcher = self._reName.match(h2.string)
|
|
|
|
if matcher is None:
|
|
|
|
self._l.error('Parsing of header "%s" failed.', h2)
|
|
|
|
raise ParsingFailedEception('Header could not be successfully parsed')
|
|
|
|
self._partner = matcher.group(1)
|
|
|
|
self._partnerin = matcher.group(2)
|
|
|
|
|
|
|
|
def _parseTable(self, table):
|
|
|
|
def parseDate(date):
|
|
|
|
match = self._reDate.fullmatch(date)
|
|
|
|
if match is None:
|
|
|
|
raise ParsingFailedEception('Cannot parse date %s in mail' % date)
|
|
|
|
self._date = f'{match.group(3)}-{match.group(2)}-{match.group(1)}'
|
|
|
|
|
|
|
|
def parseNumber(content):
|
|
|
|
match = self._reNumber.fullmatch(content)
|
|
|
|
if match is None:
|
|
|
|
raise ParsingFailedEception(f'Cannot parse the turnier number in field {content}')
|
|
|
|
self._number = match.group(1)
|
|
|
|
|
|
|
|
def parseCompetition(competition):
|
2024-03-30 12:49:26 +01:00
|
|
|
def parseDTVCompetition():
|
|
|
|
match = self._reCompetition.fullmatch(competition)
|
|
|
|
if match is None:
|
|
|
|
raise ParsingFailedEception(f'Cannot parse the competition line {competition}')
|
|
|
|
self._group = match.group(1)
|
|
|
|
self._class = match.group(2)
|
|
|
|
self._section = match.group(3)
|
|
|
|
|
|
|
|
def parseWDSFCompetition():
|
|
|
|
def checkMatch(match):
|
|
|
|
if match is None:
|
|
|
|
raise ParsingFailedEception(f'Cannot parse WDSF competition line')
|
|
|
|
|
|
|
|
def parseForward():
|
|
|
|
match = self._reWDSFCompetition.fullmatch(competition.strip())
|
|
|
|
checkMatch(match)
|
|
|
|
return match.group(2), match.group(1)
|
|
|
|
|
|
|
|
def parseReverse():
|
|
|
|
match = self._reWDSFCompetitionReversed.fullmatch(competition.strip())
|
|
|
|
checkMatch(match)
|
|
|
|
return match.group(1), match.group(2).strip()
|
|
|
|
|
|
|
|
groupMap = {
|
|
|
|
'juvenile i': 'Kin',
|
|
|
|
'juvenile ii': 'Kin',
|
|
|
|
'junior i': 'Jun 1',
|
|
|
|
'junior ii': 'Jun 2',
|
|
|
|
'youth': 'Jug',
|
|
|
|
'adult': 'Hgr',
|
|
|
|
'senior i': 'Mas I',
|
|
|
|
'senior ii': 'Mas II',
|
|
|
|
'senior iii': 'Mas III',
|
|
|
|
'senior iv': 'Mas IV',
|
|
|
|
'senior v': 'Mas V',
|
|
|
|
}
|
|
|
|
sectionMap = {
|
|
|
|
'standard': 'Std',
|
|
|
|
'latin': 'Lat',
|
|
|
|
}
|
|
|
|
funs = [parseForward, parseReverse]
|
|
|
|
for fun in funs:
|
|
|
|
try:
|
|
|
|
sec, grp = fun()
|
|
|
|
self._group = groupMap.get(grp.lower(), grp)
|
|
|
|
self._class = 'WDSF Open'
|
|
|
|
self._section = sectionMap.get(sec.lower(), sec)
|
|
|
|
return
|
|
|
|
except ParsingFailedEception:
|
|
|
|
pass
|
|
|
|
|
|
|
|
raise ParsingFailedEception('Neither forward not reversed parsing worked')
|
|
|
|
|
|
|
|
functions = [parseDTVCompetition, parseWDSFCompetition]
|
|
|
|
for fun in functions:
|
|
|
|
try:
|
|
|
|
fun()
|
|
|
|
return
|
|
|
|
except ParsingFailedEception:
|
|
|
|
pass
|
|
|
|
raise ParsingFailedEception(f'No more matchers for the competition line "{competition}" were left.')
|
2024-01-14 20:41:17 +01:00
|
|
|
|
|
|
|
def parsePlace(place):
|
|
|
|
match = self._rePlace.fullmatch(place)
|
|
|
|
if match is None:
|
|
|
|
raise ParsingFailedEception(f'Cannot parse the place entry {place}')
|
|
|
|
self._verein = match.group(1)
|
|
|
|
self._ort = match.group(2)
|
|
|
|
|
|
|
|
def parsePhone(phone):
|
|
|
|
match = self._rePhone.fullmatch(phone)
|
|
|
|
if match is None:
|
|
|
|
raise ParsingFailedEception(f'Cannot parse the phone line {phone}')
|
|
|
|
self._telefon = match.group(1)
|
|
|
|
|
|
|
|
tds = table('td')
|
|
|
|
parseDate(tds[0].string.strip())
|
|
|
|
self._title = tds[1].string.strip()
|
|
|
|
parseNumber(tds[2].string.strip())
|
|
|
|
parseCompetition(tds[3].string.strip())
|
|
|
|
parsePlace(tds[4].string.strip())
|
|
|
|
parsePhone(tds[5].string.strip())
|
|
|
|
|
|
|
|
def _cleanName(self, name: str) -> str:
|
|
|
|
cleanedName = name.lower()
|
|
|
|
cleanedName = re.sub('ä', 'ae', cleanedName)
|
|
|
|
cleanedName = re.sub('ö', 'oe', cleanedName)
|
|
|
|
cleanedName = re.sub('ü', 'ue', cleanedName)
|
|
|
|
cleanedName = re.sub('ß', 'ss', cleanedName)
|
|
|
|
cleanedName = re.sub(self._reCleaningString, '-', cleanedName)
|
|
|
|
cleanedName = re.sub(self._reDashes, '-', cleanedName)
|
|
|
|
return cleanedName.lower()
|
|
|
|
|
|
|
|
def getFilename(self, prefix: str) -> str:
|
|
|
|
namePartner = self._cleanName(self._partner)
|
|
|
|
namePartnerin = self._cleanName(self._partnerin)
|
|
|
|
competition = f'{self._group} {self._class} {self._section}'
|
|
|
|
competitionName = self._cleanName(competition)
|
2024-01-18 15:52:11 +01:00
|
|
|
ort = self._cleanName(self._ort)
|
|
|
|
|
|
|
|
filename = f'{self._date}-{ort}-{namePartner}-{namePartnerin}-{competitionName}.md'
|
2024-01-14 20:41:17 +01:00
|
|
|
|
|
|
|
return os.path.join(
|
|
|
|
prefix,
|
|
|
|
self._date[0:4],
|
2024-01-18 15:52:11 +01:00
|
|
|
re.sub(self._reDashes, '-', filename)
|
2024-01-14 20:41:17 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
def getContent(self) -> str:
|
|
|
|
with open(os.path.join(os.path.dirname(__file__), 'contenttemplate.md.tmpl')) as fp:
|
|
|
|
tpl = fp.read()
|
|
|
|
j2 = jinja2.Template(tpl)
|
|
|
|
vars = {
|
|
|
|
'date': self._date,
|
|
|
|
'partner': self._partner,
|
|
|
|
'partnerin': self._partnerin,
|
|
|
|
'verein': self._verein,
|
|
|
|
'ort': self._ort,
|
|
|
|
'telefon': self._telefon,
|
|
|
|
'group': self._group,
|
|
|
|
'class': self._class,
|
|
|
|
'section': self._section,
|
|
|
|
'title': self._title,
|
|
|
|
'number': self._number,
|
|
|
|
}
|
|
|
|
return j2.render(**vars)
|