Handle incomplete competitions as well

This commit is contained in:
Christian Wolf 2023-10-06 17:52:52 +02:00
parent be5ac238bc
commit 03fed1e1e4
2 changed files with 25 additions and 9 deletions

View File

@ -8,6 +8,10 @@ from .types import HtmlPreviewImport as HtmlImport, HtmlResultImport
from .group import GroupParser from .group import GroupParser
from .competition_class import CompetitionClassParser from .competition_class import CompetitionClassParser
class IncompleteRoundException(Exception):
def __init__(self, *args):
super(IncompleteRoundException, self).__init__(*args)
class HtmlParser: class HtmlParser:
def __init__(self, text: str, fileName: str = None): def __init__(self, text: str, fileName: str = None):
@ -73,7 +77,8 @@ class HtmlParser:
def __parseFirstTable(table): def __parseFirstTable(table):
roundName = table.tr.td.contents[0] roundName = table.tr.td.contents[0]
if roundName != 'Endrunde': if roundName != 'Endrunde':
raise Exception('Could not parse HTML file') self.l.warning('Found table with round name %s.', roundName)
raise IncompleteRoundException('Could not parse HTML file')
__parseRows(table.find_all('tr')[2:], True) __parseRows(table.find_all('tr')[2:], True)
@ -82,10 +87,14 @@ class HtmlParser:
__parseRows(table.find_all('tr'), False) __parseRows(table.find_all('tr'), False)
tables = self.soup.find('div', class_='extract').find_all('table') tables = self.soup.find('div', class_='extract').find_all('table')
if len(tables) > 0:
__parseFirstTable(tables[0]) try:
if len(tables) > 0:
__parseFirstTable(tables[0])
__parseRemainingTables(tables[1:]) __parseRemainingTables(tables[1:])
except IncompleteRoundException:
pass
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0] # title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]

View File

@ -177,10 +177,14 @@ class ResultExtractor:
for filePair in files: for filePair in files:
with open(filePair[0], 'r') as fp: with open(filePair[0], 'r') as fp:
text = fp.read() text = fp.read()
with open(filePair[1], 'r') as fp:
textTab = fp.read()
parser = html_parser.HtmlParser(text, filePair[0]) parser = html_parser.HtmlParser(text, filePair[0])
parserTab = html_parser.HtmlParser(textTab, filePair[1])
if filePair[1] is None:
parserTab = None
else:
with open(filePair[1], 'r') as fp:
textTab = fp.read()
parserTab = html_parser.HtmlParser(textTab, filePair[1])
try: try:
data = parser.guessDataFromHtmlTitle() data = parser.guessDataFromHtmlTitle()
@ -247,8 +251,11 @@ class ResultExtractor:
self.l.debug('Extracting data from file %s', fileName) self.l.debug('Extracting data from file %s', fileName)
self._analyzeSingleParser(parsers[fileNameTuple][0], ret) self._analyzeSingleParser(parsers[fileNameTuple][0], ret)
self.l.debug('Fetching individual result of combined competitions in %s', fileName) if parsers[fileNameTuple][1] is None:
self._analyzeIndividualResults(parsers[fileNameTuple][1], ret) self.l.info('Skipping extraction of individual result as class is not yet finished.')
else:
self.l.debug('Fetching individual result of combined competitions in %s', fileName)
self._analyzeIndividualResults(parsers[fileNameTuple][1], ret)
return ret return ret