diff --git a/src/solo_turnier/batch.py b/src/solo_turnier/batch.py index 1f8b567..5cbc44e 100644 --- a/src/solo_turnier/batch.py +++ b/src/solo_turnier/batch.py @@ -13,9 +13,6 @@ class BatchWorker: ): self.l = logging.getLogger('solo_turnier.batch') self.config = config - - - def run(self): self.l.debug(self.config.__dict__) diff --git a/src/solo_turnier/html_locator.py b/src/solo_turnier/html_locator.py index cd5a449..4adbe7d 100644 --- a/src/solo_turnier/html_locator.py +++ b/src/solo_turnier/html_locator.py @@ -19,8 +19,17 @@ class HtmlLocator: return ret + def __fingMatchingTabs(self, ergCandidate): + path = os.path.dirname(ergCandidate) + tabPath = os.path.join(path, 'tabges.htm') + if not os.path.exists(tabPath): + tabPath = None + return (ergCandidate, tabPath) + def findCandidates(self, path: str): - return self.__findRecursivelyCandidates(path, 'erg.htm') + candidatesErg = self.__findRecursivelyCandidates(path, 'erg.htm') + candidates = [self.__fingMatchingTabs(x) for x in candidatesErg] + return candidates def findPreviewRoundCandidates(self, path: str): candidates = self.__findRecursivelyCandidates(path, 'tabges.htm') diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py index 98139c1..99046d0 100644 --- a/src/solo_turnier/html_parser.py +++ b/src/solo_turnier/html_parser.py @@ -3,7 +3,7 @@ from bs4 import BeautifulSoup import logging import re -from .types import HtmlPreviewParticipant, HtmlParticipant +from .types import HtmlPreviewParticipant, HtmlParticipant, HtmlResultTotalTable from .types import HtmlPreviewImport as HtmlImport, HtmlResultImport from .group import GroupParser from .competition_class import CompetitionClassParser @@ -178,3 +178,78 @@ class HtmlParser: data['title'] = data['title'].strip() __cleanTable(data['data']['table']) + def parseIndividualResult(self, competitionGroup, competitionClass, dance): + + participants = {} + + def __parseTable(table): + rows = table.find_all('tr') + + def __getIds(): + row = rows[1] + entries = row('td') + entries = entries[1:] + entries = [x for x in entries if len(x.contents[0].strip()) > 0] + return [x.contents[0].strip() for x in entries] + + ids = __getIds() + numIds = len(ids) + self.l.log(5, 'Found ids in dataset: %s', ids) + + def findRowIndex(prefixStr): + def isRowMatchingCriteria(row): + if row.td.contents[0].startswith(prefixStr): + return True + return False + l = list(map(isRowMatchingCriteria, rows)) + + if True not in l: + return None + return l.index(True) + + def getPlaces(): + placeRowIdx = findRowIndex('Platz von') + placeTags = rows[placeRowIdx]('td')[1:(numIds+1)] + def getSinglePlaceStr(tag): + for br in tag('br'): + br.replace_with('-') + tag.smooth() + rawStr = tag.contents[0].strip() + if rawStr.endswith('-'): + rawStr = rawStr[:-1] + return rawStr + places = list(map(getSinglePlaceStr, placeTags)) + return places + places = getPlaces() + self.l.log(5, 'Found places: %s', places) + + def getClass(): + classRow = findRowIndex('Startklasse') + if classRow is not None: + classTags = rows[classRow]('td')[1:(numIds+1)] + return list(map(lambda x: x.contents[0], classTags)) + return None + classes = getClass() + self.l.log(5, 'Classes: %s', classes) + + def getGroups(): + groupRow = findRowIndex('Startgruppe') + if groupRow is not None: + classTags = rows[groupRow]('td')[1:(numIds+1)] + return list(map(lambda x: x.contents[0], classTags)) + return None + groups = getGroups() + self.l.log(5, 'Groups: %s', groups) + + for idx, id in enumerate(ids): + cls = classes[idx] if classes is not None else None + grp = groups[idx] if groups is not None else None + + tup = (competitionGroup, competitionClass, dance, id) + participants[tup] = (places[idx], cls, grp) + + tables = self.soup.find('div', class_='extract').find_all('table') + for table in tables: + __parseTable(table) + + return HtmlResultTotalTable( participants) diff --git a/src/solo_turnier/types.py b/src/solo_turnier/types.py index 367f2e5..3ed57b3 100644 --- a/src/solo_turnier/types.py +++ b/src/solo_turnier/types.py @@ -80,6 +80,13 @@ class HtmlResultImport: def __repr__(self): return str(self.results) + +class HtmlResultTotalTable: + def __init__(self, participants): + self.participants = participants + + def __repr__(self): + return str(self.participants) class HtmlCompetitionResultRow: def __init__(self, name, id, dance, group, class_, place, placeTo, finalist): @@ -137,6 +144,7 @@ class HtmlSingleCompetitionResult: class HtmlCompetitionTotalResults: def __init__(self): self.results = {} + self.tabges = {} def __getTuple(self, group: group.Group_t, class_: competition_class.Class_t, dance: str, id: int): return (group, class_, dance, id) diff --git a/src/solo_turnier/worker.py b/src/solo_turnier/worker.py index 2368a78..8cacbd8 100644 --- a/src/solo_turnier/worker.py +++ b/src/solo_turnier/worker.py @@ -170,29 +170,32 @@ class ResultExtractor: self.rePlaceSingle = re.compile(' *([0-9]+) *') self.rePlaceDouble = re.compile(' *([0-9]+) *- *([0-9]+) *') - def getAllParsers(self, files: list[str]) -> ParserList_t: + def getAllParsers(self, files: list[tuple[str,str]]) -> ParserList_t: ret = {} classParser = competition_class.CompetitionClassParser() - for file in files: - with open(file, 'r') as fp: + for filePair in files: + with open(filePair[0], 'r') as fp: text = fp.read() - parser = html_parser.HtmlParser(text, file) + with open(filePair[1], 'r') as fp: + textTab = fp.read() + parser = html_parser.HtmlParser(text, filePair[0]) + parserTab = html_parser.HtmlParser(textTab, filePair[1]) try: data = parser.guessDataFromHtmlTitle() except: - self.l.error('Cannot parse HTML file %s to check if it is a valid result. Check manually.', file) + self.l.error('Cannot parse HTML file %s to check if it is a valid result. Check manually.', filePair[0]) continue try: guessedClass = classParser.parseClass(data['class_']) except: - self.l.error('Issue parsing class of file %s. Check manually.', file) + self.l.error('Issue parsing class of file %s. Check manually.', filePair[0]) continue self.l.debug('Fetched result data: %s, guessed class %s', data, guessedClass) - ret[file] = parser + ret[filePair] = (parser, parserTab) return ret @@ -226,12 +229,26 @@ class ResultExtractor: results.add(competitionGroup, competitionClass, dance, person.id, competitionResult) # + def _analyzeIndividualResults(self, parser: html_parser.HtmlParser, results: types.HtmlCompetitionTotalResults): + data = parser.guessDataFromHtmlTitle() + competitionClass = data['class_'] + competitionGroup = data['group'] + dance = data['dance'] + + result = parser.parseIndividualResult(competitionGroup, competitionClass, dance) + self.l.log(5, 'Found individual results: %s', result.participants) + results.tabges.update(result.participants) + def extractAllData(self, parsers: ParserList_t) -> types.HtmlCompetitionTotalResults: ret = types.HtmlCompetitionTotalResults() - for fileName in parsers: + for fileNameTuple in parsers: + fileName = fileNameTuple[0] self.l.debug('Extracting data from file %s', fileName) - self._analyzeSingleParser(parsers[fileName], ret) + self._analyzeSingleParser(parsers[fileNameTuple][0], ret) + + self.l.debug('Fetching individual result of combined competitions in %s', fileName) + self._analyzeIndividualResults(parsers[fileNameTuple][1], ret) return ret @@ -444,9 +461,10 @@ class Worker: results[participant] = resultsOfParticipant self.l.log(5, 'Result before native fixing: %s', (results)) - self._fixNativePlaces(dances, results) + # self._fixNativePlaces(dances, results) + self._fixNativePlacesFromTable(dances, results, importedData.htmlResults) # self.l.log(5, 'Result after native fixing: %s', pformat(results)) - + self.l.log(5,'Data %s', results) totalResult[group] = types.TotalGroupResult(dances, results) @@ -525,16 +543,53 @@ class Worker: nativeClass = previewResults.results[participant][dance] # self.l.log(5, 'Result %s => %s', key, rawResult) - return types.SingleParticipantResult( + ret = types.SingleParticipantResult( key[2], nativeClass, dance, rawResult.finalist, rawResult.place, rawResult.placeTo ) + + return ret return None results[danceIdx] = getResult() return results + def _fixNativePlacesFromTable( + self, + dances: list[str], + data: dict[types.HtmlPreviewParticipant, list[types.SingleParticipantResult]], + importedData: types.HtmlCompetitionTotalResults + ): + rePlace = re.compile('([0-9]+)(?:-([0-9]+))?') + for participant in data.keys(): + self.l.log(5, 'fixing participant %s', participant) + results = data[participant] + for result in results: + if result is None: + continue + self.l.log(5, 'Looking at result set %s', result) + + def selectEntry(k): + return k[2] == result.dance and int(k[3]) == participant.id + + keys = list(importedData.tabges.keys()) + selected = list(map(selectEntry, keys)) + selectedIndex = selected.index(True) + + raw = importedData.tabges[keys[selectedIndex]] + self.l.log(5,'Raw %s', raw) + nativePlaceRaw = raw[0] + matcher = rePlace.fullmatch(nativePlaceRaw) + if matcher is None: + self.l.error('Cannot parse place string %s for participant %u (%s) in dance %s', nativePlaceRaw, participant.id, participant, result.dance) + continue + self.l.log(5, 'Found strings by regex: %s', matcher.groups()) + result.placeNative = matcher.group(1) + result.placeNativeTo = matcher.group(2) + + pass + def _fixNativePlaces( self, dances: list[str],