From 03fed1e1e4c4c2e1f7212ff886900cd86c3cffb5 Mon Sep 17 00:00:00 2001 From: Christian Wolf Date: Fri, 6 Oct 2023 17:52:52 +0200 Subject: [PATCH] Handle incomplete competitions as well --- src/solo_turnier/html_parser.py | 17 +++++++++++++---- src/solo_turnier/worker.py | 17 ++++++++++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py index 99046d0..7117e4b 100644 --- a/src/solo_turnier/html_parser.py +++ b/src/solo_turnier/html_parser.py @@ -8,6 +8,10 @@ from .types import HtmlPreviewImport as HtmlImport, HtmlResultImport from .group import GroupParser from .competition_class import CompetitionClassParser +class IncompleteRoundException(Exception): + def __init__(self, *args): + super(IncompleteRoundException, self).__init__(*args) + class HtmlParser: def __init__(self, text: str, fileName: str = None): @@ -73,7 +77,8 @@ class HtmlParser: def __parseFirstTable(table): roundName = table.tr.td.contents[0] if roundName != 'Endrunde': - raise Exception('Could not parse HTML file') + self.l.warning('Found table with round name %s.', roundName) + raise IncompleteRoundException('Could not parse HTML file') __parseRows(table.find_all('tr')[2:], True) @@ -82,10 +87,14 @@ class HtmlParser: __parseRows(table.find_all('tr'), False) tables = self.soup.find('div', class_='extract').find_all('table') - if len(tables) > 0: - __parseFirstTable(tables[0]) + + try: + if len(tables) > 0: + __parseFirstTable(tables[0]) - __parseRemainingTables(tables[1:]) + __parseRemainingTables(tables[1:]) + except IncompleteRoundException: + pass # title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0] diff --git a/src/solo_turnier/worker.py b/src/solo_turnier/worker.py index cd6b0a1..9968e61 100644 --- a/src/solo_turnier/worker.py +++ b/src/solo_turnier/worker.py @@ -177,10 +177,14 @@ class ResultExtractor: for filePair in files: with open(filePair[0], 'r') as fp: text = fp.read() - with open(filePair[1], 'r') as fp: - textTab = fp.read() parser = html_parser.HtmlParser(text, filePair[0]) - parserTab = html_parser.HtmlParser(textTab, filePair[1]) + + if filePair[1] is None: + parserTab = None + else: + with open(filePair[1], 'r') as fp: + textTab = fp.read() + parserTab = html_parser.HtmlParser(textTab, filePair[1]) try: data = parser.guessDataFromHtmlTitle() @@ -247,8 +251,11 @@ class ResultExtractor: self.l.debug('Extracting data from file %s', fileName) self._analyzeSingleParser(parsers[fileNameTuple][0], ret) - self.l.debug('Fetching individual result of combined competitions in %s', fileName) - self._analyzeIndividualResults(parsers[fileNameTuple][1], ret) + if parsers[fileNameTuple][1] is None: + self.l.info('Skipping extraction of individual result as class is not yet finished.') + else: + self.l.debug('Fetching individual result of combined competitions in %s', fileName) + self._analyzeIndividualResults(parsers[fileNameTuple][1], ret) return ret