From 240824e80882644e6cffa0d45f9d8519b37921e5 Mon Sep 17 00:00:00 2001 From: Christian Wolf Date: Sat, 3 Dec 2022 21:46:19 +0100 Subject: [PATCH] Start using newly defined types --- src/solo_turnier/batch.py | 13 +-- src/solo_turnier/tests/test_types.py | 18 ++++ src/solo_turnier/types.py | 54 +++------- src/solo_turnier/worker.py | 141 +++++++++++---------------- 4 files changed, 91 insertions(+), 135 deletions(-) create mode 100644 src/solo_turnier/tests/test_types.py diff --git a/src/solo_turnier/batch.py b/src/solo_turnier/batch.py index b472a00..e54f853 100644 --- a/src/solo_turnier/batch.py +++ b/src/solo_turnier/batch.py @@ -181,17 +181,8 @@ class BatchWorker: htmlCandidatesPreview = locator.findPreviewRoundCandidates(self.config.importHtmlPath()) self.l.debug('Found HTML file candidates for preview rounds: %s', htmlCandidatesPreview) - previewWorker = solo_turnier.worker.PreviewWorker() - self.l.info('Filtering for pure preview rounds.') - parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview) - self.l.debug('Remaining files: %s', parsers.keys()) - - self.l.info('Extracting person data from the preview rounds.') - previewWorker.extractPersonsFromPreview(parsers) - - csvReader = solo_turnier.reader.CSVResultReader(self.config.importCSVPath()) - self.l.info('Loading the total result CSV file %s', self.config.importCSVPath()) - csvRows = csvReader.extractResult() + worker = solo_turnier.worker.Worker() + worker.collectAllData(htmlCandidatesPreview, self.config.importCSVPath()) # csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath()) # self.l.info('Loading the total result CSV file %s', self.config.importCSVPath()) diff --git a/src/solo_turnier/tests/test_types.py b/src/solo_turnier/tests/test_types.py new file mode 100644 index 0000000..ca6ffdc --- /dev/null +++ b/src/solo_turnier/tests/test_types.py @@ -0,0 +1,18 @@ +import pytest +import solo_turnier.types as types + +def test_HtmlPreviewParticipant_eq(): + name = 'Max Mustermann' + id = 123 + group = 'Kin' + participant = types.HtmlPreviewParticipant(name, id, group) + + l = [] + assert participant not in l + l.append(participant) + assert participant in l + + assert types.HtmlPreviewParticipant(name, id, group) in l + assert types.HtmlPreviewParticipant('Maxime Musterfrau', id, group) not in l + assert types.HtmlPreviewParticipant(name, 234, group) not in l + assert types.HtmlPreviewParticipant(name, id, 'Jun') not in l diff --git a/src/solo_turnier/types.py b/src/solo_turnier/types.py index 5161f7d..5fd8e92 100644 --- a/src/solo_turnier/types.py +++ b/src/solo_turnier/types.py @@ -21,18 +21,26 @@ class CSVResultRow: return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}' class HtmlPreviewParticipant: - def __init__(self, name, place, finalist): + def __init__(self, name, id, participant_group): self.name = name - self.place = place - self.finalist = finalist + self.id = id + self.group = group.GroupParser().parseClass(participant_group) + def __eq__(self, o): + if type(o) != HtmlPreviewParticipant: + return False + + return all(map(lambda x, y: x == y, (self.name, self.id, self.group), (o.name, o.id, o.group))) + def __repr__(self): - return f'{self.name} (with place {self.place})' + return f'{self.id}: {self.name} ({self.group})' class HtmlPreviewImport: - def __init__(self, title: str, participants: dict[int, HtmlPreviewParticipant]): - self.title = title + def __init__(self, participants: dict[int, HtmlPreviewParticipant]): self.participants = participants + + def __repr__(self): + return str(self.participants) class HtmlCompetitionResultRow: def __init__(self, name, id, dance, group, class_, place, placeTo, finalist): @@ -76,38 +84,6 @@ class HtmlSingleCompetitionResult: self.placeTo = placeTo self.finalist = finalist -class HtmlCompetitionResultForDance: - def __init__(self, dance: str, results: dict[int, HtmlSingleCompetitionResult]): - self.dance = dance - self.results = results - - def get(self, id: int) ->HtmlSingleCompetitionResult: - return self.results[id] - -class HtmlCompetitionResultForClass: - def __init__( - self, - class_: competition_class.Class_t, - results: dict[str, HtmlCompetitionResultForDance] - ): - self.class_ = class_ - self.results = results - - def get(self, dance: str) -> HtmlCompetitionResultForDance: - return self.results[dance] - -class HtmlCompetitionResultForGroup: - def __init__( - self, - group: group.Group_t, - results: dict[competition_class.Class_t, HtmlCompetitionResultForClass] - ): - self.group = group - self.results = results - - def get(self, class_: str) -> HtmlCompetitionResultForClass: - return self.results[class_] - class HtmlCompetitionTotalResults: def __init__(self): self.results = {} @@ -124,7 +100,7 @@ class HtmlCompetitionTotalResults: l.append(result) self.results[tup] = l -class State4: +class State3: def __init__( self, csvRows: list[CSVResultRow], diff --git a/src/solo_turnier/worker.py b/src/solo_turnier/worker.py index b9dfe1c..aa8644a 100644 --- a/src/solo_turnier/worker.py +++ b/src/solo_turnier/worker.py @@ -1,8 +1,11 @@ import logging from pprint import pformat +import solo_turnier from solo_turnier import html_parser from .reader import ResultRow +from .types import HtmlCompetitionResultRow as CompetitionResult +from . import types class HtmlPerson: def __init__(self, name, id, group): @@ -59,65 +62,18 @@ class ResultPerson: text = str(self) return text.__hash__() -class CompetitionResult: - def __init__(self, dance, group, class_, place, placeTo, id, competitionGroup, competitionClass): - self.dance = dance - self.group = group - self.class_ = class_ - self.place = place - self.placeTo = placeTo - self.id = int(id) - self.competitionGroup = competitionGroup - self.competitionClass = competitionClass - self.finalist = None - - @staticmethod - def extractFromResultRow(row: ResultRow): - return CompetitionResult( - dance=row.dance, - group=row.group, - class_=row.class_, - place=row.place, placeTo=row.placeTo, - id=row.id, - competitionGroup=row.competitionGroup, - competitionClass=row.competitionClass - ) - - def __repr__(self): - if self.place == self.placeTo: - result = f'{self.place}.' - else: - result = f'{self.place}.-{self.placeTo}.' - - if self.finalist == True: - finalist = '[F]' - else: - finalist = '' - return f'Result[{self.id}]({self.group} {self.class_} {self.dance} as {result}{finalist})' - - def __eq__(self, o): - if not isinstance(o, CompetitionResult): - return False - - return ( - self.dance == o.dance and - self.competitionClass == o.competitionClass and - self.competitionGroup == o.competitionGroup and - self.place == o.place and self.placeTo == o.placeTo and - self.id == o.id - ) - class ImportNotParsableException(Exception): pass +ParserList_t = dict[str, html_parser.HtmlParser] + class PreviewWorker: def __init__(self): self.l = logging.getLogger('solo_turnier.worker.PreviewWorker') - self.persons = None - self.parsers = None + self.participants = {} - def filterFilesPreview(self, files: list[str]) -> dict[str, html_parser.HtmlParser]: + def filterFilesPreview(self, files: list[str]) -> ParserList_t: self.l.debug('Filtering the list of parsers by removing all non preview entries.') ret = {} for file in files: @@ -151,50 +107,41 @@ class PreviewWorker: self.l.fatal('Cannot parse the parsed content of the preview file.') raise ImportNotParsableException('Incompatible export file') - ids = [] - names = [] - indices = [] - for index, e in enumerate(data['table'][0]): - if e['text'] == '': - continue - indices.append(index) - ids.append(e['text']) - names.append(e['meta']) - - groups = [] - if data['titles'][-1] == 'Startgruppe': - self.l.debug('Combined competition found. Extracting group from table') - groups = [data['table'][-1][idx]['text'] for idx in indices] + self.l.debug('Combined competition found. Extracting group from table required.') + extractGroup = True else: self.l.debug('Using group from the title.') group = parser.guessDataFromHtmlTitle(imported['title'])['group'] - groups = [group for i in indices] + extractGroup = False - def __mappingFcn(id, name, group): - return HtmlPerson(name, id, group) + for index, e in enumerate(data['table'][0]): + if e['text'] == '': + # Skip empty columns + continue + + # Extract data from column + name = e['meta'] + id = int(e['text']) + if extractGroup: + group = data['table'][-1][index]['text'] + + participant = types.HtmlPreviewParticipant(name, id, group) - currentPersons = list(map(__mappingFcn, ids, names, groups)) - self.l.log(5, 'Extracted persons in preview round: %s', currentPersons) - - for p in currentPersons: - current = self.parsers.get(p, []) - current.append(parser) - self.parsers[p] = current + l = self.participants.get(id, []) + self.l.log(5, 'Checking for existence of %s in %s: %s', participant, l, participant in l) + if participant not in l: + l.append(participant) + self.participants[id] = l - def __resetStructures(self): - self.persons = None - self.parsers = {} - - def extractPersonsFromPreview(self, parsers): - self.__resetStructures() + def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport: + self.participants = {} for file in parsers: - self.l.debug('Extracting person data from %s', file) - self.__extractPersonsFromSinglePreview(parsers[file]) + parser = parsers[file] + self.__extractPersonsFromSinglePreview(parser) - self.persons = self.parsers.keys() - self.l.log(5, 'Extracted person data: %s', pformat(self.parsers)) + return types.HtmlPreviewImport(self.participants) class DataWorker: def __init__(self): @@ -346,3 +293,27 @@ class DataWorker: return ret +class Worker: + def __init__(self): + self.l = logging.getLogger('solo_turnier.worker.Worker') + + def collectAllData( + self, + htmlCandidatesPreview: list[str], + csvFile: str + ) -> types.State3: + + previewWorker = PreviewWorker() + self.l.info('Filtering for pure preview rounds.') + parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview) + self.l.debug('Remaining files: %s', parsers.keys()) + + self.l.info('Extracting person data from the preview rounds.') + previewImport = previewWorker.importAllData(parsers) + self.l.debug('Total preview import: %s', previewImport) + + csvReader = solo_turnier.reader.CSVResultReader(csvFile) + self.l.info('Loading the total result CSV file %s', csvFile) + csvRows = csvReader.extractResult() + + return None