2022-11-14 19:01:32 +00:00
|
|
|
import logging
|
2022-12-03 13:29:35 +00:00
|
|
|
from pprint import pformat
|
2022-11-14 19:01:32 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
import solo_turnier
|
2022-11-15 15:52:19 +00:00
|
|
|
from solo_turnier import html_parser
|
2022-12-03 13:29:35 +00:00
|
|
|
from .reader import ResultRow
|
2022-12-03 20:46:19 +00:00
|
|
|
from .types import HtmlCompetitionResultRow as CompetitionResult
|
|
|
|
from . import types
|
2022-11-15 17:11:40 +00:00
|
|
|
|
2022-11-27 08:10:17 +00:00
|
|
|
class HtmlPerson:
|
|
|
|
def __init__(self, name, id, group):
|
|
|
|
self.name = name
|
|
|
|
self.id = id
|
|
|
|
self.group = group
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return f'{self.name} ({self.id}, {self.group})'
|
2022-12-03 13:29:35 +00:00
|
|
|
|
|
|
|
def __eq__(self, o):
|
|
|
|
if not isinstance(o, HtmlPerson):
|
|
|
|
return False
|
|
|
|
return str(self) == str(o)
|
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return str(self).__hash__()
|
2022-11-27 08:10:17 +00:00
|
|
|
|
2022-11-14 19:01:32 +00:00
|
|
|
class ResultPerson:
|
2022-11-15 09:48:50 +00:00
|
|
|
def __init__(self, firstName, lastName, club, id = None, group = None):
|
2022-11-14 19:01:32 +00:00
|
|
|
self.firstName = firstName
|
|
|
|
self.lastName = lastName
|
|
|
|
self.name = f'{firstName} {lastName}'
|
|
|
|
self.club = club
|
|
|
|
self.id = id
|
2022-11-15 09:48:50 +00:00
|
|
|
self.group = group
|
2022-11-14 19:01:32 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def extractFromResultRow(row: ResultRow):
|
|
|
|
return ResultPerson(
|
|
|
|
firstName=row.firstName,
|
|
|
|
lastName=row.lastName,
|
|
|
|
club=row.club
|
|
|
|
)
|
2022-11-15 09:48:50 +00:00
|
|
|
|
|
|
|
def __eq__(self, o):
|
|
|
|
if not isinstance(o, ResultPerson):
|
|
|
|
return False
|
|
|
|
|
|
|
|
return (
|
|
|
|
self.firstName == o.firstName and
|
|
|
|
self.lastName == o.lastName and
|
|
|
|
self.club == o.club and
|
|
|
|
self.id == o.id
|
|
|
|
)
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
if self.id is None:
|
|
|
|
return f'{self.name} ({self.club})'
|
|
|
|
else:
|
|
|
|
return f'{self.name} ({self.club}) [{self.id}]'
|
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
text = str(self)
|
|
|
|
return text.__hash__()
|
2022-11-14 19:01:32 +00:00
|
|
|
|
|
|
|
|
2022-12-03 13:29:35 +00:00
|
|
|
class ImportNotParsableException(Exception):
|
|
|
|
pass
|
2022-11-15 09:48:50 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
ParserList_t = dict[str, html_parser.HtmlParser]
|
|
|
|
|
2022-11-27 08:10:17 +00:00
|
|
|
class PreviewWorker:
|
|
|
|
def __init__(self):
|
|
|
|
self.l = logging.getLogger('solo_turnier.worker.PreviewWorker')
|
2022-12-03 20:46:19 +00:00
|
|
|
self.participants = {}
|
2022-11-27 08:10:17 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
def filterFilesPreview(self, files: list[str]) -> ParserList_t:
|
2022-11-27 08:10:17 +00:00
|
|
|
self.l.debug('Filtering the list of parsers by removing all non preview entries.')
|
|
|
|
ret = {}
|
|
|
|
for file in files:
|
|
|
|
with open(file, 'r') as fp:
|
|
|
|
text = fp.read()
|
|
|
|
|
2022-12-03 13:29:35 +00:00
|
|
|
parser = html_parser.HtmlParser(text, file)
|
2022-11-27 08:10:17 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
data = parser.guessDataFromHtmlTitle()
|
|
|
|
except:
|
|
|
|
self.l.error(f'Unable to parse html file in {file}. Please check manually.')
|
|
|
|
continue
|
|
|
|
|
|
|
|
if data['class_'] == 'Sichtung':
|
|
|
|
self.l.debug(f"Found candidate in {file}. Adding to the list.")
|
|
|
|
ret[file] = parser
|
|
|
|
else:
|
|
|
|
self.l.debug(f'Rejecting file {file} as the name {data["class_"]} did not match.')
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser):
|
|
|
|
imported = parser.parsePreparationRound()
|
|
|
|
parser.cleanPreparationRoundImport(imported)
|
|
|
|
data = imported['data']
|
|
|
|
|
|
|
|
self.l.log(5, data)
|
|
|
|
|
|
|
|
if data['titles'][0] != 'Wertungsrichter':
|
|
|
|
self.l.fatal('Cannot parse the parsed content of the preview file.')
|
2022-12-03 13:29:35 +00:00
|
|
|
raise ImportNotParsableException('Incompatible export file')
|
2022-11-27 08:10:17 +00:00
|
|
|
|
|
|
|
if data['titles'][-1] == 'Startgruppe':
|
2022-12-03 20:46:19 +00:00
|
|
|
self.l.debug('Combined competition found. Extracting group from table required.')
|
|
|
|
extractGroup = True
|
2022-11-27 08:10:17 +00:00
|
|
|
else:
|
|
|
|
self.l.debug('Using group from the title.')
|
|
|
|
group = parser.guessDataFromHtmlTitle(imported['title'])['group']
|
2022-12-03 20:46:19 +00:00
|
|
|
extractGroup = False
|
2022-11-27 08:10:17 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
for index, e in enumerate(data['table'][0]):
|
|
|
|
if e['text'] == '':
|
|
|
|
# Skip empty columns
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Extract data from column
|
|
|
|
name = e['meta']
|
|
|
|
id = int(e['text'])
|
|
|
|
if extractGroup:
|
|
|
|
group = data['table'][-1][index]['text']
|
|
|
|
|
|
|
|
participant = types.HtmlPreviewParticipant(name, id, group)
|
2022-12-03 13:29:35 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
l = self.participants.get(id, [])
|
|
|
|
self.l.log(5, 'Checking for existence of %s in %s: %s', participant, l, participant in l)
|
|
|
|
if participant not in l:
|
|
|
|
l.append(participant)
|
|
|
|
self.participants[id] = l
|
2022-11-27 08:10:17 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport:
|
|
|
|
self.participants = {}
|
2022-12-03 13:29:35 +00:00
|
|
|
|
2022-11-27 08:10:17 +00:00
|
|
|
for file in parsers:
|
2022-12-03 20:46:19 +00:00
|
|
|
parser = parsers[file]
|
|
|
|
self.__extractPersonsFromSinglePreview(parser)
|
2022-12-03 13:29:35 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
return types.HtmlPreviewImport(self.participants)
|
2022-11-27 08:10:17 +00:00
|
|
|
|
2022-11-15 09:48:50 +00:00
|
|
|
class DataWorker:
|
|
|
|
def __init__(self):
|
2022-12-03 13:29:35 +00:00
|
|
|
self.l = logging.getLogger('solo_turnier.worker.DataWorker')
|
2022-11-15 09:48:50 +00:00
|
|
|
|
|
|
|
def combineRowsByPerson(self, rows: list[ResultRow]) -> dict[ResultPerson, list[CompetitionResult]]:
|
|
|
|
ret = {}
|
|
|
|
for row in rows:
|
|
|
|
result = CompetitionResult.extractFromResultRow(row)
|
|
|
|
|
|
|
|
if result.place == '-' or result.placeTo == '-':
|
|
|
|
continue
|
|
|
|
|
|
|
|
person = ResultPerson.extractFromResultRow(row)
|
|
|
|
if person not in ret:
|
|
|
|
ret[person] = []
|
|
|
|
ret[person].append(result)
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def checkUniqueIds(self, data: dict[ResultPerson, list[CompetitionResult]]) -> bool:
|
|
|
|
unique = True
|
|
|
|
for person in data:
|
|
|
|
ids = set([c.id for c in data[person]])
|
|
|
|
if len(ids) == 1:
|
|
|
|
person.id = list(ids)[0]
|
|
|
|
else:
|
|
|
|
unique = False
|
|
|
|
|
|
|
|
return unique
|
2022-11-15 12:00:17 +00:00
|
|
|
|
|
|
|
"""
|
|
|
|
Return a tuple
|
|
|
|
The first one is True, if all persons could be unambiguously identified a group
|
|
|
|
The second one is True if there was the need to override a group but it was possible to extract from other data
|
|
|
|
The second one can be seen as a warning
|
|
|
|
"""
|
|
|
|
def consolidateGroups(self, data:dict[ResultPerson, list[CompetitionResult]]) -> tuple[bool, bool]:
|
|
|
|
ambiguous = False
|
|
|
|
warnChange = False
|
|
|
|
|
|
|
|
unambiguousGroups = set(['Kin.', 'Jun.', 'Jug.'])
|
|
|
|
combinations = set(['Kin./Jun.', 'Jun./Jug.'])
|
|
|
|
|
|
|
|
for person in data:
|
|
|
|
groupsRaw = set([c.group for c in data[person]])
|
|
|
|
|
|
|
|
unknown = groupsRaw.difference(unambiguousGroups).difference(combinations)
|
|
|
|
if len(unknown) > 0:
|
|
|
|
raise Exception(f'There were unknown groups found for {person}: {unknown}')
|
|
|
|
|
|
|
|
numUnambiguousGroups = len(groupsRaw.intersection(unambiguousGroups))
|
|
|
|
|
|
|
|
if numUnambiguousGroups == 0:
|
|
|
|
if len(groupsRaw) == 2:
|
|
|
|
warnChange = True
|
|
|
|
person.group = 'Jun.'
|
|
|
|
else:
|
|
|
|
ambiguous = True
|
|
|
|
if len(groupsRaw) == 1:
|
|
|
|
person.group = list(groupsRaw)[0]
|
|
|
|
|
|
|
|
elif numUnambiguousGroups == 1:
|
|
|
|
if len(groupsRaw.intersection(combinations)) > 0:
|
|
|
|
warnChange = True
|
|
|
|
|
|
|
|
person.group = list(groupsRaw.intersection(unambiguousGroups))[0]
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise Exception(f'{person} cannot have different groups.')
|
|
|
|
|
|
|
|
return (not ambiguous, warnChange)
|
2022-11-15 15:52:19 +00:00
|
|
|
|
|
|
|
def _createHtmlLUT(self, htmlImports: list[html_parser.HtmlImport]):
|
|
|
|
ret = {}
|
2022-11-26 07:43:15 +00:00
|
|
|
parser = html_parser.HtmlParser('')
|
2022-11-15 15:52:19 +00:00
|
|
|
for imp in htmlImports:
|
|
|
|
parsed = parser.guessDataFromHtmlTitle(imp.title)
|
|
|
|
key = (parsed['group'], parsed['class_'], parsed['dance'])
|
|
|
|
ret[key] = imp
|
2022-11-16 09:22:09 +00:00
|
|
|
self.l.debug('LUT[%s] = %s', key, imp)
|
|
|
|
self.l.debug('LUT completed')
|
2022-11-15 15:52:19 +00:00
|
|
|
return ret
|
|
|
|
|
2022-11-15 17:11:40 +00:00
|
|
|
def mergeHtmlData(self, data:dict[ResultPerson, list[CompetitionResult]], htmlImports: list[html_parser.HtmlImport]):
|
|
|
|
lut = self._createHtmlLUT(htmlImports)
|
|
|
|
|
|
|
|
for person in data:
|
|
|
|
for competition in data[person]:
|
|
|
|
key = (competition.competitionGroup, competition.competitionClass, competition.dance)
|
|
|
|
htmlImport = lut[key]
|
|
|
|
participant = htmlImport.participants[str(competition.id)]
|
|
|
|
if participant.name != person.name:
|
|
|
|
self.l.error(f'Names for {person} and participant in HTML import ({participant}) do not match. Please check carefully.')
|
|
|
|
competition.finalist = participant.finalist
|
2022-11-15 17:39:41 +00:00
|
|
|
|
|
|
|
def getAllDancesInCompetitions(self, data:dict[ResultPerson, list[CompetitionResult]]) -> list[str]:
|
|
|
|
allDances = [
|
|
|
|
'Samba', 'Cha Cha', 'Rumba', 'Paso Doble', 'Jive',
|
|
|
|
'Langs. Walzer', 'Tango', 'Wiener Walzer', 'Slowfox', 'Quickstep'
|
|
|
|
]
|
|
|
|
dancesPresent = {d: False for d in allDances}
|
|
|
|
|
|
|
|
for person in data:
|
|
|
|
for competition in data[person]:
|
|
|
|
dancesPresent[competition.dance] = True
|
|
|
|
|
|
|
|
return [d for d in allDances if dancesPresent[d]]
|
2022-11-15 18:03:24 +00:00
|
|
|
|
|
|
|
def collectPersonsInGroups(self, data:dict[ResultPerson, list[CompetitionResult]]) -> list[tuple[str, list[ResultPerson]]]:
|
|
|
|
groups = {
|
|
|
|
'Kin.': [p for p in data.keys() if p.group == 'Kin.'],
|
|
|
|
'Jun.': [p for p in data.keys() if p.group == 'Jun.'],
|
|
|
|
'Jug.': [p for p in data.keys() if p.group == 'Jug.'],
|
|
|
|
}
|
|
|
|
found = groups['Kin.'] + groups['Jun.'] + groups['Jug.']
|
|
|
|
groups['Sonst'] = [p for p in data.keys() if p not in found]
|
|
|
|
return groups
|
2022-11-15 18:36:04 +00:00
|
|
|
|
|
|
|
def sortPersonsInGroup(self, persons: list[ResultPerson]) -> list[ResultPerson]:
|
|
|
|
ids = [p.id for p in persons]
|
|
|
|
|
|
|
|
def decorateByName(p: ResultPerson):
|
|
|
|
return (f'{p.name} ({p.club})', p)
|
|
|
|
def decorateById(p: ResultPerson):
|
|
|
|
return (p.id, p)
|
|
|
|
|
|
|
|
if any([id == None for id in ids]):
|
|
|
|
# We need to sort by name
|
|
|
|
decorated = [decorateByName(p) for p in persons]
|
|
|
|
showIds = False
|
|
|
|
else:
|
|
|
|
decorated = [decorateById(p) for p in persons]
|
|
|
|
showIds = True
|
|
|
|
|
|
|
|
decorated.sort()
|
|
|
|
|
|
|
|
return ([d[1] for d in decorated], showIds)
|
|
|
|
|
2022-11-15 18:37:08 +00:00
|
|
|
def mapPersonResultsToDanceList(self, results: list[CompetitionResult], dances: list[str]) -> list[CompetitionResult|None]:
|
|
|
|
ret = []
|
|
|
|
for dance in dances:
|
|
|
|
competitions = [c for c in results if c.dance == dance]
|
|
|
|
if len(competitions) == 0:
|
|
|
|
ret.append(None)
|
|
|
|
elif len(competitions) > 1:
|
|
|
|
raise Exception(f'Multiple competitions with the same dance "{dance}" found.')
|
|
|
|
else:
|
|
|
|
ret.append(competitions[0])
|
|
|
|
|
|
|
|
return ret
|
2022-11-27 08:10:17 +00:00
|
|
|
|
2022-12-03 20:46:19 +00:00
|
|
|
class Worker:
|
|
|
|
def __init__(self):
|
|
|
|
self.l = logging.getLogger('solo_turnier.worker.Worker')
|
|
|
|
|
|
|
|
def collectAllData(
|
|
|
|
self,
|
|
|
|
htmlCandidatesPreview: list[str],
|
|
|
|
csvFile: str
|
|
|
|
) -> types.State3:
|
|
|
|
|
|
|
|
previewWorker = PreviewWorker()
|
|
|
|
self.l.info('Filtering for pure preview rounds.')
|
|
|
|
parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
|
|
|
|
self.l.debug('Remaining files: %s', parsers.keys())
|
|
|
|
|
|
|
|
self.l.info('Extracting person data from the preview rounds.')
|
|
|
|
previewImport = previewWorker.importAllData(parsers)
|
|
|
|
self.l.debug('Total preview import: %s', previewImport)
|
|
|
|
|
|
|
|
csvReader = solo_turnier.reader.CSVResultReader(csvFile)
|
|
|
|
self.l.info('Loading the total result CSV file %s', csvFile)
|
|
|
|
csvRows = csvReader.extractResult()
|
|
|
|
|
|
|
|
return None
|