import logging from pprint import pformat import solo_turnier from solo_turnier import html_parser from .reader import ResultRow from .types import HtmlCompetitionResultRow as CompetitionResult from . import types class HtmlPerson: def __init__(self, name, id, group): self.name = name self.id = id self.group = group def __repr__(self): return f'{self.name} ({self.id}, {self.group})' def __eq__(self, o): if not isinstance(o, HtmlPerson): return False return str(self) == str(o) def __hash__(self): return str(self).__hash__() class ResultPerson: def __init__(self, firstName, lastName, club, id = None, group = None): self.firstName = firstName self.lastName = lastName self.name = f'{firstName} {lastName}' self.club = club self.id = id self.group = group @staticmethod def extractFromResultRow(row: ResultRow): return ResultPerson( firstName=row.firstName, lastName=row.lastName, club=row.club ) def __eq__(self, o): if not isinstance(o, ResultPerson): return False return ( self.firstName == o.firstName and self.lastName == o.lastName and self.club == o.club and self.id == o.id ) def __repr__(self): if self.id is None: return f'{self.name} ({self.club})' else: return f'{self.name} ({self.club}) [{self.id}]' def __hash__(self): text = str(self) return text.__hash__() class ImportNotParsableException(Exception): pass ParserList_t = dict[str, html_parser.HtmlParser] class PreviewWorker: def __init__(self): self.l = logging.getLogger('solo_turnier.worker.PreviewWorker') self.participants = {} def filterFilesPreview(self, files: list[str]) -> ParserList_t: self.l.debug('Filtering the list of parsers by removing all non preview entries.') ret = {} for file in files: with open(file, 'r') as fp: text = fp.read() parser = html_parser.HtmlParser(text, file) try: data = parser.guessDataFromHtmlTitle() except: self.l.error(f'Unable to parse html file in {file}. Please check manually.') continue if data['class_'] == 'Sichtung': self.l.debug(f"Found candidate in {file}. Adding to the list.") ret[file] = parser else: self.l.debug(f'Rejecting file {file} as the name {data["class_"]} did not match.') return ret def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser): imported = parser.parsePreparationRound() parser.cleanPreparationRoundImport(imported) data = imported['data'] self.l.log(5, data) if data['titles'][0] != 'Wertungsrichter': self.l.fatal('Cannot parse the parsed content of the preview file.') raise ImportNotParsableException('Incompatible export file') if data['titles'][-1] == 'Startgruppe': self.l.debug('Combined competition found. Extracting group from table required.') extractGroup = True else: self.l.debug('Using group from the title.') group = parser.guessDataFromHtmlTitle(imported['title'])['group'] extractGroup = False for index, e in enumerate(data['table'][0]): if e['text'] == '': # Skip empty columns continue # Extract data from column name = e['meta'] id = int(e['text']) if extractGroup: group = data['table'][-1][index]['text'] participant = types.HtmlPreviewParticipant(name, id, group) l = self.participants.get(id, []) self.l.log(5, 'Checking for existence of %s in %s: %s', participant, l, participant in l) if participant not in l: l.append(participant) self.participants[id] = l def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport: self.participants = {} for file in parsers: parser = parsers[file] self.__extractPersonsFromSinglePreview(parser) return types.HtmlPreviewImport(self.participants) class DataWorker: def __init__(self): self.l = logging.getLogger('solo_turnier.worker.DataWorker') def combineRowsByPerson(self, rows: list[ResultRow]) -> dict[ResultPerson, list[CompetitionResult]]: ret = {} for row in rows: result = CompetitionResult.extractFromResultRow(row) if result.place == '-' or result.placeTo == '-': continue person = ResultPerson.extractFromResultRow(row) if person not in ret: ret[person] = [] ret[person].append(result) return ret def checkUniqueIds(self, data: dict[ResultPerson, list[CompetitionResult]]) -> bool: unique = True for person in data: ids = set([c.id for c in data[person]]) if len(ids) == 1: person.id = list(ids)[0] else: unique = False return unique """ Return a tuple The first one is True, if all persons could be unambiguously identified a group The second one is True if there was the need to override a group but it was possible to extract from other data The second one can be seen as a warning """ def consolidateGroups(self, data:dict[ResultPerson, list[CompetitionResult]]) -> tuple[bool, bool]: ambiguous = False warnChange = False unambiguousGroups = set(['Kin.', 'Jun.', 'Jug.']) combinations = set(['Kin./Jun.', 'Jun./Jug.']) for person in data: groupsRaw = set([c.group for c in data[person]]) unknown = groupsRaw.difference(unambiguousGroups).difference(combinations) if len(unknown) > 0: raise Exception(f'There were unknown groups found for {person}: {unknown}') numUnambiguousGroups = len(groupsRaw.intersection(unambiguousGroups)) if numUnambiguousGroups == 0: if len(groupsRaw) == 2: warnChange = True person.group = 'Jun.' else: ambiguous = True if len(groupsRaw) == 1: person.group = list(groupsRaw)[0] elif numUnambiguousGroups == 1: if len(groupsRaw.intersection(combinations)) > 0: warnChange = True person.group = list(groupsRaw.intersection(unambiguousGroups))[0] else: raise Exception(f'{person} cannot have different groups.') return (not ambiguous, warnChange) def _createHtmlLUT(self, htmlImports: list[html_parser.HtmlImport]): ret = {} parser = html_parser.HtmlParser('') for imp in htmlImports: parsed = parser.guessDataFromHtmlTitle(imp.title) key = (parsed['group'], parsed['class_'], parsed['dance']) ret[key] = imp self.l.debug('LUT[%s] = %s', key, imp) self.l.debug('LUT completed') return ret def mergeHtmlData(self, data:dict[ResultPerson, list[CompetitionResult]], htmlImports: list[html_parser.HtmlImport]): lut = self._createHtmlLUT(htmlImports) for person in data: for competition in data[person]: key = (competition.competitionGroup, competition.competitionClass, competition.dance) htmlImport = lut[key] participant = htmlImport.participants[str(competition.id)] if participant.name != person.name: self.l.error(f'Names for {person} and participant in HTML import ({participant}) do not match. Please check carefully.') competition.finalist = participant.finalist def getAllDancesInCompetitions(self, data:dict[ResultPerson, list[CompetitionResult]]) -> list[str]: allDances = [ 'Samba', 'Cha Cha', 'Rumba', 'Paso Doble', 'Jive', 'Langs. Walzer', 'Tango', 'Wiener Walzer', 'Slowfox', 'Quickstep' ] dancesPresent = {d: False for d in allDances} for person in data: for competition in data[person]: dancesPresent[competition.dance] = True return [d for d in allDances if dancesPresent[d]] def collectPersonsInGroups(self, data:dict[ResultPerson, list[CompetitionResult]]) -> list[tuple[str, list[ResultPerson]]]: groups = { 'Kin.': [p for p in data.keys() if p.group == 'Kin.'], 'Jun.': [p for p in data.keys() if p.group == 'Jun.'], 'Jug.': [p for p in data.keys() if p.group == 'Jug.'], } found = groups['Kin.'] + groups['Jun.'] + groups['Jug.'] groups['Sonst'] = [p for p in data.keys() if p not in found] return groups def sortPersonsInGroup(self, persons: list[ResultPerson]) -> list[ResultPerson]: ids = [p.id for p in persons] def decorateByName(p: ResultPerson): return (f'{p.name} ({p.club})', p) def decorateById(p: ResultPerson): return (p.id, p) if any([id == None for id in ids]): # We need to sort by name decorated = [decorateByName(p) for p in persons] showIds = False else: decorated = [decorateById(p) for p in persons] showIds = True decorated.sort() return ([d[1] for d in decorated], showIds) def mapPersonResultsToDanceList(self, results: list[CompetitionResult], dances: list[str]) -> list[CompetitionResult|None]: ret = [] for dance in dances: competitions = [c for c in results if c.dance == dance] if len(competitions) == 0: ret.append(None) elif len(competitions) > 1: raise Exception(f'Multiple competitions with the same dance "{dance}" found.') else: ret.append(competitions[0]) return ret class Worker: def __init__(self): self.l = logging.getLogger('solo_turnier.worker.Worker') def collectAllData( self, htmlCandidatesPreview: list[str], csvFile: str ) -> types.State3: previewWorker = PreviewWorker() self.l.info('Filtering for pure preview rounds.') parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview) self.l.debug('Remaining files: %s', parsers.keys()) self.l.info('Extracting person data from the preview rounds.') previewImport = previewWorker.importAllData(parsers) self.l.debug('Total preview import: %s', previewImport) csvReader = solo_turnier.reader.CSVResultReader(csvFile) self.l.info('Loading the total result CSV file %s', csvFile) csvRows = csvReader.extractResult() return None