Removed preview worker to simplify code base
This commit is contained in:
parent
c06c5ed791
commit
f5132ce8e8
@ -16,15 +16,9 @@ class BatchWorker:
|
||||
|
||||
locator = solo_turnier.html_locator.HtmlLocator()
|
||||
self.l.info(
|
||||
'Checking for feasible preview HTML export files in "%s"',
|
||||
'Checking for feasible HTML export files in "%s"',
|
||||
self.config.importHtmlPath(),
|
||||
)
|
||||
htmlCandidatesPreview = locator.findPreviewRoundCandidates(
|
||||
self.config.importHtmlPath()
|
||||
)
|
||||
self.l.debug(
|
||||
"Found HTML file candidates for preview rounds: %s", htmlCandidatesPreview
|
||||
)
|
||||
|
||||
htmlResultFiles = locator.findCandidates(self.config.importHtmlPath())
|
||||
self.l.debug(
|
||||
@ -32,7 +26,7 @@ class BatchWorker:
|
||||
)
|
||||
|
||||
worker = solo_turnier.worker.Worker()
|
||||
importedData = worker.collectAllData(htmlCandidatesPreview, htmlResultFiles)
|
||||
importedData = worker.collectAllData(htmlResultFiles)
|
||||
combinedData = worker.combineData(importedData)
|
||||
|
||||
worker.filterOutFinalists(combinedData, removeFilteredParicipants)
|
||||
|
@ -75,9 +75,6 @@ class Cli:
|
||||
def importHtmlPath(self):
|
||||
return self.__args.html[0]
|
||||
|
||||
def importCSVPath(self):
|
||||
return self.__args.import_from[0]
|
||||
|
||||
def output(self):
|
||||
return self.__args.output[0]
|
||||
|
||||
|
@ -31,7 +31,3 @@ class HtmlLocator:
|
||||
candidatesErg = self.__findRecursivelyCandidates(path, "erg.htm")
|
||||
candidates = [self.__fingMatchingTabs(x) for x in candidatesErg]
|
||||
return candidates
|
||||
|
||||
def findPreviewRoundCandidates(self, path: str):
|
||||
candidates = self.__findRecursivelyCandidates(path, "tabges.htm")
|
||||
return candidates
|
||||
|
@ -66,126 +66,9 @@ class ResultPerson:
|
||||
return text.__hash__()
|
||||
|
||||
|
||||
class ImportNotParsableException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
ParserList_t = dict[str, html_parser.HtmlParser]
|
||||
|
||||
|
||||
class PreviewWorker:
|
||||
def __init__(self):
|
||||
self.l = logging.getLogger("solo_turnier.worker.PreviewWorker")
|
||||
self.participants = {}
|
||||
self.previewResults = {}
|
||||
|
||||
def filterFilesPreview(self, files: list[str]) -> ParserList_t:
|
||||
self.l.debug(
|
||||
"Filtering the list of parsers by removing all non preview entries."
|
||||
)
|
||||
ret = {}
|
||||
for file in files:
|
||||
with open(file, "r") as fp:
|
||||
text = fp.read()
|
||||
|
||||
parser = html_parser.HtmlParser(text, file)
|
||||
|
||||
try:
|
||||
data = parser.guessDataFromHtmlTitle()
|
||||
except:
|
||||
self.l.error(
|
||||
f"Unable to parse html file in {file}. Please check manually."
|
||||
)
|
||||
continue
|
||||
|
||||
if data["class_"] == "Sichtung":
|
||||
self.l.debug(f"Found candidate in {file}. Adding to the list.")
|
||||
ret[file] = parser
|
||||
else:
|
||||
self.l.debug(
|
||||
f'Rejecting file {file} as the name {data["class_"]} did not match.'
|
||||
)
|
||||
|
||||
return ret
|
||||
|
||||
def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser):
|
||||
imported = parser.parsePreparationRound()
|
||||
parser.cleanPreparationRoundImport(imported)
|
||||
data = imported["data"]
|
||||
|
||||
headerData = parser.guessDataFromHtmlTitle()
|
||||
dance = headerData["dance"]
|
||||
classParser = solo_turnier.competition_class.CompetitionClassParser()
|
||||
|
||||
def getRowIndexOfClass():
|
||||
return data["titles"].index("Platz von\nPlatz bis")
|
||||
|
||||
self.l.log(5, data)
|
||||
|
||||
if data["titles"][0] != "Wertungsrichter":
|
||||
self.l.fatal("Cannot parse the parsed content of the preview file.")
|
||||
raise ImportNotParsableException("Incompatible export file")
|
||||
|
||||
if data["titles"][-1] == "Startgruppe":
|
||||
self.l.debug(
|
||||
"Combined competition found. Extracting group from table required."
|
||||
)
|
||||
extractGroup = True
|
||||
else:
|
||||
self.l.debug("Using group from the title.")
|
||||
group = parser.guessDataFromHtmlTitle(imported["title"])["group"]
|
||||
extractGroup = False
|
||||
|
||||
classRowIndex = getRowIndexOfClass()
|
||||
|
||||
for index, e in enumerate(data["table"][0]):
|
||||
if e["text"] == "":
|
||||
# Skip empty columns
|
||||
continue
|
||||
|
||||
# Extract data from column
|
||||
name = e["meta"]
|
||||
id = int(e["text"])
|
||||
if extractGroup:
|
||||
group = data["table"][-1][index]["text"]
|
||||
|
||||
# dance =
|
||||
class_ = classParser.parseClass(data["table"][classRowIndex][index]["text"])
|
||||
|
||||
participant = types.HtmlPreviewParticipant(name, id, group)
|
||||
|
||||
l = self.participants.get(id, [])
|
||||
self.l.log(
|
||||
5,
|
||||
"Checking for existence of %s in %s: %s",
|
||||
participant,
|
||||
l,
|
||||
participant in l,
|
||||
)
|
||||
if participant not in l:
|
||||
l.append(participant)
|
||||
self.participants[id] = l
|
||||
|
||||
results = self.previewResults.get(participant, {})
|
||||
results[dance] = class_
|
||||
self.previewResults[participant] = results
|
||||
|
||||
def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport:
|
||||
self.participants = {}
|
||||
|
||||
for file in parsers:
|
||||
parser = parsers[file]
|
||||
try:
|
||||
self.__extractPersonsFromSinglePreview(parser)
|
||||
except:
|
||||
self.l.error(
|
||||
"Failed to parse preview round in file %s. Skipping this file's content.",
|
||||
parser.fileName,
|
||||
)
|
||||
|
||||
return types.HtmlPreviewImport(self.participants, self.previewResults)
|
||||
|
||||
|
||||
class ResultExtractor:
|
||||
def __init__(self):
|
||||
self.l = logging.getLogger("solo_turnier.worker.ResultExtractor")
|
||||
@ -500,28 +383,13 @@ class Worker:
|
||||
"Quickstep",
|
||||
]
|
||||
|
||||
def collectAllData(
|
||||
self, htmlCandidatesPreview: list[str], htmlResultsFileNames: list[str]
|
||||
) -> types.State3:
|
||||
previewWorker = PreviewWorker()
|
||||
self.l.info("Filtering for pure preview rounds.")
|
||||
parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
|
||||
self.l.debug("Remaining files: %s", list(parsers.keys()))
|
||||
|
||||
self.l.info("Extracting person data from the preview rounds.")
|
||||
previewImport = previewWorker.importAllData(parsers)
|
||||
self.l.debug(
|
||||
"Total preview imported participants: %s",
|
||||
pformat(previewImport.participants),
|
||||
)
|
||||
self.l.log(5, "Total preview results: %s", pformat(previewImport.results))
|
||||
|
||||
def collectAllData(self, htmlResultsFileNames: list[str]) -> types.State3:
|
||||
resultExtractor = ResultExtractor()
|
||||
resultParsers = resultExtractor.getAllParsers(htmlResultsFileNames)
|
||||
htmlResults = resultExtractor.extractAllData(resultParsers)
|
||||
self.l.info("Overall result data extracted: %s", pformat(htmlResults.results))
|
||||
|
||||
return types.State3(previewImport, htmlResults)
|
||||
return types.State3(None, htmlResults)
|
||||
|
||||
def combineData(self, importedData: types.State3):
|
||||
self.l.info("Starting to build data sets.")
|
||||
@ -546,7 +414,6 @@ class Worker:
|
||||
resultsOfParticipant = self._getResultOfSingleParticipant(
|
||||
participant,
|
||||
group,
|
||||
importedData.previewImport,
|
||||
importedData.htmlResults,
|
||||
dances,
|
||||
)
|
||||
@ -654,7 +521,6 @@ class Worker:
|
||||
self,
|
||||
participant: types.HtmlParticipant,
|
||||
nominalGroup: solo_turnier.group.Group,
|
||||
previewResults: types.HtmlPreviewImport,
|
||||
totalResults: types.HtmlCompetitionTotalResults,
|
||||
allDances: list[str],
|
||||
) -> list[types.SingleParticipantResult | None]:
|
||||
@ -747,74 +613,6 @@ class Worker:
|
||||
|
||||
pass
|
||||
|
||||
def _fixNativePlaces(
|
||||
self,
|
||||
dances: list[str],
|
||||
data: dict[types.HtmlPreviewParticipant, list[types.SingleParticipantResult]],
|
||||
):
|
||||
classParser = solo_turnier.competition_class.CompetitionClassParser()
|
||||
allClasses = classParser.getAllClasses()
|
||||
allClasses.reverse()
|
||||
|
||||
for class_ in allClasses:
|
||||
for danceIdx, dance in enumerate(dances):
|
||||
self.l.log(
|
||||
5, "Fixing native places for class %s in dance %s", class_, dance
|
||||
)
|
||||
|
||||
remainingParticipants = []
|
||||
|
||||
for participant in data.keys():
|
||||
results = data[participant]
|
||||
danceResult = results[danceIdx]
|
||||
|
||||
if danceResult is None:
|
||||
continue
|
||||
|
||||
# self.l.log(5, 'Result of dance: %s', danceResult)
|
||||
|
||||
if classParser.isABetterThanB(danceResult.nativeClass, class_):
|
||||
# self.l.log(5, 'Skipping %s as the native class is higher', participant)
|
||||
continue
|
||||
|
||||
remainingParticipants.append(
|
||||
(danceResult.place, participant.id, participant)
|
||||
)
|
||||
|
||||
remainingParticipants.sort()
|
||||
# self.l.log(5, 'Remaining participants %s', remainingParticipants)
|
||||
|
||||
def getAllParticipantsWithSamePlace():
|
||||
first = remainingParticipants.pop(0)
|
||||
ret = [first]
|
||||
while (
|
||||
len(remainingParticipants) > 0
|
||||
and remainingParticipants[0][0] == first[0]
|
||||
):
|
||||
ret.append(remainingParticipants.pop(0))
|
||||
return ret
|
||||
|
||||
def updateNativePlaces(samePlaced, placeStart):
|
||||
nextPlace = placeStart + len(samePlaced)
|
||||
if len(samePlaced) == 1:
|
||||
placeTo = None
|
||||
else:
|
||||
placeTo = nextPlace - 1
|
||||
|
||||
for p in samePlaced:
|
||||
data[p[2]][danceIdx].placeNative = placeStart
|
||||
data[p[2]][danceIdx].placeNativeTo = placeTo
|
||||
|
||||
return nextPlace
|
||||
|
||||
places = list(map(lambda x: x[0], remainingParticipants))
|
||||
place = 1
|
||||
while len(remainingParticipants) > 0:
|
||||
samePlaced = getAllParticipantsWithSamePlace()
|
||||
place = updateNativePlaces(samePlaced, place)
|
||||
|
||||
# self.l.log(5, '(Partially) fixed places: %s', (data))
|
||||
|
||||
def filterOutFinalists(self, data: types.State4, filterOut: bool):
|
||||
for group in data.results:
|
||||
self.l.debug("Cleaning up group %s", group.name)
|
||||
|
Loading…
Reference in New Issue
Block a user