From f5132ce8e8a13f6e825935e715b30aa986f459af Mon Sep 17 00:00:00 2001 From: Christian Wolf Date: Sun, 19 Nov 2023 18:07:49 +0100 Subject: [PATCH] Removed preview worker to simplify code base --- src/solo_turnier/batch.py | 10 +- src/solo_turnier/cli.py | 3 - src/solo_turnier/html_locator.py | 4 - src/solo_turnier/worker.py | 206 +------------------------------ 4 files changed, 4 insertions(+), 219 deletions(-) diff --git a/src/solo_turnier/batch.py b/src/solo_turnier/batch.py index ad489e3..c600d30 100644 --- a/src/solo_turnier/batch.py +++ b/src/solo_turnier/batch.py @@ -16,15 +16,9 @@ class BatchWorker: locator = solo_turnier.html_locator.HtmlLocator() self.l.info( - 'Checking for feasible preview HTML export files in "%s"', + 'Checking for feasible HTML export files in "%s"', self.config.importHtmlPath(), ) - htmlCandidatesPreview = locator.findPreviewRoundCandidates( - self.config.importHtmlPath() - ) - self.l.debug( - "Found HTML file candidates for preview rounds: %s", htmlCandidatesPreview - ) htmlResultFiles = locator.findCandidates(self.config.importHtmlPath()) self.l.debug( @@ -32,7 +26,7 @@ class BatchWorker: ) worker = solo_turnier.worker.Worker() - importedData = worker.collectAllData(htmlCandidatesPreview, htmlResultFiles) + importedData = worker.collectAllData(htmlResultFiles) combinedData = worker.combineData(importedData) worker.filterOutFinalists(combinedData, removeFilteredParicipants) diff --git a/src/solo_turnier/cli.py b/src/solo_turnier/cli.py index 2d240c5..d4c256f 100644 --- a/src/solo_turnier/cli.py +++ b/src/solo_turnier/cli.py @@ -75,9 +75,6 @@ class Cli: def importHtmlPath(self): return self.__args.html[0] - def importCSVPath(self): - return self.__args.import_from[0] - def output(self): return self.__args.output[0] diff --git a/src/solo_turnier/html_locator.py b/src/solo_turnier/html_locator.py index 429ab6b..95bd20b 100644 --- a/src/solo_turnier/html_locator.py +++ b/src/solo_turnier/html_locator.py @@ -31,7 +31,3 @@ class HtmlLocator: candidatesErg = self.__findRecursivelyCandidates(path, "erg.htm") candidates = [self.__fingMatchingTabs(x) for x in candidatesErg] return candidates - - def findPreviewRoundCandidates(self, path: str): - candidates = self.__findRecursivelyCandidates(path, "tabges.htm") - return candidates diff --git a/src/solo_turnier/worker.py b/src/solo_turnier/worker.py index c4faa1c..7cda1c1 100644 --- a/src/solo_turnier/worker.py +++ b/src/solo_turnier/worker.py @@ -66,126 +66,9 @@ class ResultPerson: return text.__hash__() -class ImportNotParsableException(Exception): - pass - - ParserList_t = dict[str, html_parser.HtmlParser] -class PreviewWorker: - def __init__(self): - self.l = logging.getLogger("solo_turnier.worker.PreviewWorker") - self.participants = {} - self.previewResults = {} - - def filterFilesPreview(self, files: list[str]) -> ParserList_t: - self.l.debug( - "Filtering the list of parsers by removing all non preview entries." - ) - ret = {} - for file in files: - with open(file, "r") as fp: - text = fp.read() - - parser = html_parser.HtmlParser(text, file) - - try: - data = parser.guessDataFromHtmlTitle() - except: - self.l.error( - f"Unable to parse html file in {file}. Please check manually." - ) - continue - - if data["class_"] == "Sichtung": - self.l.debug(f"Found candidate in {file}. Adding to the list.") - ret[file] = parser - else: - self.l.debug( - f'Rejecting file {file} as the name {data["class_"]} did not match.' - ) - - return ret - - def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser): - imported = parser.parsePreparationRound() - parser.cleanPreparationRoundImport(imported) - data = imported["data"] - - headerData = parser.guessDataFromHtmlTitle() - dance = headerData["dance"] - classParser = solo_turnier.competition_class.CompetitionClassParser() - - def getRowIndexOfClass(): - return data["titles"].index("Platz von\nPlatz bis") - - self.l.log(5, data) - - if data["titles"][0] != "Wertungsrichter": - self.l.fatal("Cannot parse the parsed content of the preview file.") - raise ImportNotParsableException("Incompatible export file") - - if data["titles"][-1] == "Startgruppe": - self.l.debug( - "Combined competition found. Extracting group from table required." - ) - extractGroup = True - else: - self.l.debug("Using group from the title.") - group = parser.guessDataFromHtmlTitle(imported["title"])["group"] - extractGroup = False - - classRowIndex = getRowIndexOfClass() - - for index, e in enumerate(data["table"][0]): - if e["text"] == "": - # Skip empty columns - continue - - # Extract data from column - name = e["meta"] - id = int(e["text"]) - if extractGroup: - group = data["table"][-1][index]["text"] - - # dance = - class_ = classParser.parseClass(data["table"][classRowIndex][index]["text"]) - - participant = types.HtmlPreviewParticipant(name, id, group) - - l = self.participants.get(id, []) - self.l.log( - 5, - "Checking for existence of %s in %s: %s", - participant, - l, - participant in l, - ) - if participant not in l: - l.append(participant) - self.participants[id] = l - - results = self.previewResults.get(participant, {}) - results[dance] = class_ - self.previewResults[participant] = results - - def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport: - self.participants = {} - - for file in parsers: - parser = parsers[file] - try: - self.__extractPersonsFromSinglePreview(parser) - except: - self.l.error( - "Failed to parse preview round in file %s. Skipping this file's content.", - parser.fileName, - ) - - return types.HtmlPreviewImport(self.participants, self.previewResults) - - class ResultExtractor: def __init__(self): self.l = logging.getLogger("solo_turnier.worker.ResultExtractor") @@ -500,28 +383,13 @@ class Worker: "Quickstep", ] - def collectAllData( - self, htmlCandidatesPreview: list[str], htmlResultsFileNames: list[str] - ) -> types.State3: - previewWorker = PreviewWorker() - self.l.info("Filtering for pure preview rounds.") - parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview) - self.l.debug("Remaining files: %s", list(parsers.keys())) - - self.l.info("Extracting person data from the preview rounds.") - previewImport = previewWorker.importAllData(parsers) - self.l.debug( - "Total preview imported participants: %s", - pformat(previewImport.participants), - ) - self.l.log(5, "Total preview results: %s", pformat(previewImport.results)) - + def collectAllData(self, htmlResultsFileNames: list[str]) -> types.State3: resultExtractor = ResultExtractor() resultParsers = resultExtractor.getAllParsers(htmlResultsFileNames) htmlResults = resultExtractor.extractAllData(resultParsers) self.l.info("Overall result data extracted: %s", pformat(htmlResults.results)) - return types.State3(previewImport, htmlResults) + return types.State3(None, htmlResults) def combineData(self, importedData: types.State3): self.l.info("Starting to build data sets.") @@ -546,7 +414,6 @@ class Worker: resultsOfParticipant = self._getResultOfSingleParticipant( participant, group, - importedData.previewImport, importedData.htmlResults, dances, ) @@ -654,7 +521,6 @@ class Worker: self, participant: types.HtmlParticipant, nominalGroup: solo_turnier.group.Group, - previewResults: types.HtmlPreviewImport, totalResults: types.HtmlCompetitionTotalResults, allDances: list[str], ) -> list[types.SingleParticipantResult | None]: @@ -747,74 +613,6 @@ class Worker: pass - def _fixNativePlaces( - self, - dances: list[str], - data: dict[types.HtmlPreviewParticipant, list[types.SingleParticipantResult]], - ): - classParser = solo_turnier.competition_class.CompetitionClassParser() - allClasses = classParser.getAllClasses() - allClasses.reverse() - - for class_ in allClasses: - for danceIdx, dance in enumerate(dances): - self.l.log( - 5, "Fixing native places for class %s in dance %s", class_, dance - ) - - remainingParticipants = [] - - for participant in data.keys(): - results = data[participant] - danceResult = results[danceIdx] - - if danceResult is None: - continue - - # self.l.log(5, 'Result of dance: %s', danceResult) - - if classParser.isABetterThanB(danceResult.nativeClass, class_): - # self.l.log(5, 'Skipping %s as the native class is higher', participant) - continue - - remainingParticipants.append( - (danceResult.place, participant.id, participant) - ) - - remainingParticipants.sort() - # self.l.log(5, 'Remaining participants %s', remainingParticipants) - - def getAllParticipantsWithSamePlace(): - first = remainingParticipants.pop(0) - ret = [first] - while ( - len(remainingParticipants) > 0 - and remainingParticipants[0][0] == first[0] - ): - ret.append(remainingParticipants.pop(0)) - return ret - - def updateNativePlaces(samePlaced, placeStart): - nextPlace = placeStart + len(samePlaced) - if len(samePlaced) == 1: - placeTo = None - else: - placeTo = nextPlace - 1 - - for p in samePlaced: - data[p[2]][danceIdx].placeNative = placeStart - data[p[2]][danceIdx].placeNativeTo = placeTo - - return nextPlace - - places = list(map(lambda x: x[0], remainingParticipants)) - place = 1 - while len(remainingParticipants) > 0: - samePlaced = getAllParticipantsWithSamePlace() - place = updateNativePlaces(samePlaced, place) - - # self.l.log(5, '(Partially) fixed places: %s', (data)) - def filterOutFinalists(self, data: types.State4, filterOut: bool): for group in data.results: self.l.debug("Cleaning up group %s", group.name)