Removed preview worker to simplify code base

This commit is contained in:
Christian Wolf 2023-11-19 18:07:49 +01:00
parent c06c5ed791
commit f5132ce8e8
4 changed files with 4 additions and 219 deletions

View File

@ -16,15 +16,9 @@ class BatchWorker:
locator = solo_turnier.html_locator.HtmlLocator() locator = solo_turnier.html_locator.HtmlLocator()
self.l.info( self.l.info(
'Checking for feasible preview HTML export files in "%s"', 'Checking for feasible HTML export files in "%s"',
self.config.importHtmlPath(), self.config.importHtmlPath(),
) )
htmlCandidatesPreview = locator.findPreviewRoundCandidates(
self.config.importHtmlPath()
)
self.l.debug(
"Found HTML file candidates for preview rounds: %s", htmlCandidatesPreview
)
htmlResultFiles = locator.findCandidates(self.config.importHtmlPath()) htmlResultFiles = locator.findCandidates(self.config.importHtmlPath())
self.l.debug( self.l.debug(
@ -32,7 +26,7 @@ class BatchWorker:
) )
worker = solo_turnier.worker.Worker() worker = solo_turnier.worker.Worker()
importedData = worker.collectAllData(htmlCandidatesPreview, htmlResultFiles) importedData = worker.collectAllData(htmlResultFiles)
combinedData = worker.combineData(importedData) combinedData = worker.combineData(importedData)
worker.filterOutFinalists(combinedData, removeFilteredParicipants) worker.filterOutFinalists(combinedData, removeFilteredParicipants)

View File

@ -75,9 +75,6 @@ class Cli:
def importHtmlPath(self): def importHtmlPath(self):
return self.__args.html[0] return self.__args.html[0]
def importCSVPath(self):
return self.__args.import_from[0]
def output(self): def output(self):
return self.__args.output[0] return self.__args.output[0]

View File

@ -31,7 +31,3 @@ class HtmlLocator:
candidatesErg = self.__findRecursivelyCandidates(path, "erg.htm") candidatesErg = self.__findRecursivelyCandidates(path, "erg.htm")
candidates = [self.__fingMatchingTabs(x) for x in candidatesErg] candidates = [self.__fingMatchingTabs(x) for x in candidatesErg]
return candidates return candidates
def findPreviewRoundCandidates(self, path: str):
candidates = self.__findRecursivelyCandidates(path, "tabges.htm")
return candidates

View File

@ -66,126 +66,9 @@ class ResultPerson:
return text.__hash__() return text.__hash__()
class ImportNotParsableException(Exception):
pass
ParserList_t = dict[str, html_parser.HtmlParser] ParserList_t = dict[str, html_parser.HtmlParser]
class PreviewWorker:
def __init__(self):
self.l = logging.getLogger("solo_turnier.worker.PreviewWorker")
self.participants = {}
self.previewResults = {}
def filterFilesPreview(self, files: list[str]) -> ParserList_t:
self.l.debug(
"Filtering the list of parsers by removing all non preview entries."
)
ret = {}
for file in files:
with open(file, "r") as fp:
text = fp.read()
parser = html_parser.HtmlParser(text, file)
try:
data = parser.guessDataFromHtmlTitle()
except:
self.l.error(
f"Unable to parse html file in {file}. Please check manually."
)
continue
if data["class_"] == "Sichtung":
self.l.debug(f"Found candidate in {file}. Adding to the list.")
ret[file] = parser
else:
self.l.debug(
f'Rejecting file {file} as the name {data["class_"]} did not match.'
)
return ret
def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser):
imported = parser.parsePreparationRound()
parser.cleanPreparationRoundImport(imported)
data = imported["data"]
headerData = parser.guessDataFromHtmlTitle()
dance = headerData["dance"]
classParser = solo_turnier.competition_class.CompetitionClassParser()
def getRowIndexOfClass():
return data["titles"].index("Platz von\nPlatz bis")
self.l.log(5, data)
if data["titles"][0] != "Wertungsrichter":
self.l.fatal("Cannot parse the parsed content of the preview file.")
raise ImportNotParsableException("Incompatible export file")
if data["titles"][-1] == "Startgruppe":
self.l.debug(
"Combined competition found. Extracting group from table required."
)
extractGroup = True
else:
self.l.debug("Using group from the title.")
group = parser.guessDataFromHtmlTitle(imported["title"])["group"]
extractGroup = False
classRowIndex = getRowIndexOfClass()
for index, e in enumerate(data["table"][0]):
if e["text"] == "":
# Skip empty columns
continue
# Extract data from column
name = e["meta"]
id = int(e["text"])
if extractGroup:
group = data["table"][-1][index]["text"]
# dance =
class_ = classParser.parseClass(data["table"][classRowIndex][index]["text"])
participant = types.HtmlPreviewParticipant(name, id, group)
l = self.participants.get(id, [])
self.l.log(
5,
"Checking for existence of %s in %s: %s",
participant,
l,
participant in l,
)
if participant not in l:
l.append(participant)
self.participants[id] = l
results = self.previewResults.get(participant, {})
results[dance] = class_
self.previewResults[participant] = results
def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport:
self.participants = {}
for file in parsers:
parser = parsers[file]
try:
self.__extractPersonsFromSinglePreview(parser)
except:
self.l.error(
"Failed to parse preview round in file %s. Skipping this file's content.",
parser.fileName,
)
return types.HtmlPreviewImport(self.participants, self.previewResults)
class ResultExtractor: class ResultExtractor:
def __init__(self): def __init__(self):
self.l = logging.getLogger("solo_turnier.worker.ResultExtractor") self.l = logging.getLogger("solo_turnier.worker.ResultExtractor")
@ -500,28 +383,13 @@ class Worker:
"Quickstep", "Quickstep",
] ]
def collectAllData( def collectAllData(self, htmlResultsFileNames: list[str]) -> types.State3:
self, htmlCandidatesPreview: list[str], htmlResultsFileNames: list[str]
) -> types.State3:
previewWorker = PreviewWorker()
self.l.info("Filtering for pure preview rounds.")
parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
self.l.debug("Remaining files: %s", list(parsers.keys()))
self.l.info("Extracting person data from the preview rounds.")
previewImport = previewWorker.importAllData(parsers)
self.l.debug(
"Total preview imported participants: %s",
pformat(previewImport.participants),
)
self.l.log(5, "Total preview results: %s", pformat(previewImport.results))
resultExtractor = ResultExtractor() resultExtractor = ResultExtractor()
resultParsers = resultExtractor.getAllParsers(htmlResultsFileNames) resultParsers = resultExtractor.getAllParsers(htmlResultsFileNames)
htmlResults = resultExtractor.extractAllData(resultParsers) htmlResults = resultExtractor.extractAllData(resultParsers)
self.l.info("Overall result data extracted: %s", pformat(htmlResults.results)) self.l.info("Overall result data extracted: %s", pformat(htmlResults.results))
return types.State3(previewImport, htmlResults) return types.State3(None, htmlResults)
def combineData(self, importedData: types.State3): def combineData(self, importedData: types.State3):
self.l.info("Starting to build data sets.") self.l.info("Starting to build data sets.")
@ -546,7 +414,6 @@ class Worker:
resultsOfParticipant = self._getResultOfSingleParticipant( resultsOfParticipant = self._getResultOfSingleParticipant(
participant, participant,
group, group,
importedData.previewImport,
importedData.htmlResults, importedData.htmlResults,
dances, dances,
) )
@ -654,7 +521,6 @@ class Worker:
self, self,
participant: types.HtmlParticipant, participant: types.HtmlParticipant,
nominalGroup: solo_turnier.group.Group, nominalGroup: solo_turnier.group.Group,
previewResults: types.HtmlPreviewImport,
totalResults: types.HtmlCompetitionTotalResults, totalResults: types.HtmlCompetitionTotalResults,
allDances: list[str], allDances: list[str],
) -> list[types.SingleParticipantResult | None]: ) -> list[types.SingleParticipantResult | None]:
@ -747,74 +613,6 @@ class Worker:
pass pass
def _fixNativePlaces(
self,
dances: list[str],
data: dict[types.HtmlPreviewParticipant, list[types.SingleParticipantResult]],
):
classParser = solo_turnier.competition_class.CompetitionClassParser()
allClasses = classParser.getAllClasses()
allClasses.reverse()
for class_ in allClasses:
for danceIdx, dance in enumerate(dances):
self.l.log(
5, "Fixing native places for class %s in dance %s", class_, dance
)
remainingParticipants = []
for participant in data.keys():
results = data[participant]
danceResult = results[danceIdx]
if danceResult is None:
continue
# self.l.log(5, 'Result of dance: %s', danceResult)
if classParser.isABetterThanB(danceResult.nativeClass, class_):
# self.l.log(5, 'Skipping %s as the native class is higher', participant)
continue
remainingParticipants.append(
(danceResult.place, participant.id, participant)
)
remainingParticipants.sort()
# self.l.log(5, 'Remaining participants %s', remainingParticipants)
def getAllParticipantsWithSamePlace():
first = remainingParticipants.pop(0)
ret = [first]
while (
len(remainingParticipants) > 0
and remainingParticipants[0][0] == first[0]
):
ret.append(remainingParticipants.pop(0))
return ret
def updateNativePlaces(samePlaced, placeStart):
nextPlace = placeStart + len(samePlaced)
if len(samePlaced) == 1:
placeTo = None
else:
placeTo = nextPlace - 1
for p in samePlaced:
data[p[2]][danceIdx].placeNative = placeStart
data[p[2]][danceIdx].placeNativeTo = placeTo
return nextPlace
places = list(map(lambda x: x[0], remainingParticipants))
place = 1
while len(remainingParticipants) > 0:
samePlaced = getAllParticipantsWithSamePlace()
place = updateNativePlaces(samePlaced, place)
# self.l.log(5, '(Partially) fixed places: %s', (data))
def filterOutFinalists(self, data: types.State4, filterOut: bool): def filterOutFinalists(self, data: types.State4, filterOut: bool):
for group in data.results: for group in data.results:
self.l.debug("Cleaning up group %s", group.name) self.l.debug("Cleaning up group %s", group.name)