Handle incomplete competitions as well
This commit is contained in:
parent
be5ac238bc
commit
03fed1e1e4
@ -8,6 +8,10 @@ from .types import HtmlPreviewImport as HtmlImport, HtmlResultImport
|
||||
from .group import GroupParser
|
||||
from .competition_class import CompetitionClassParser
|
||||
|
||||
class IncompleteRoundException(Exception):
|
||||
def __init__(self, *args):
|
||||
super(IncompleteRoundException, self).__init__(*args)
|
||||
|
||||
class HtmlParser:
|
||||
|
||||
def __init__(self, text: str, fileName: str = None):
|
||||
@ -73,7 +77,8 @@ class HtmlParser:
|
||||
def __parseFirstTable(table):
|
||||
roundName = table.tr.td.contents[0]
|
||||
if roundName != 'Endrunde':
|
||||
raise Exception('Could not parse HTML file')
|
||||
self.l.warning('Found table with round name %s.', roundName)
|
||||
raise IncompleteRoundException('Could not parse HTML file')
|
||||
|
||||
__parseRows(table.find_all('tr')[2:], True)
|
||||
|
||||
@ -82,10 +87,14 @@ class HtmlParser:
|
||||
__parseRows(table.find_all('tr'), False)
|
||||
|
||||
tables = self.soup.find('div', class_='extract').find_all('table')
|
||||
if len(tables) > 0:
|
||||
__parseFirstTable(tables[0])
|
||||
|
||||
try:
|
||||
if len(tables) > 0:
|
||||
__parseFirstTable(tables[0])
|
||||
|
||||
__parseRemainingTables(tables[1:])
|
||||
__parseRemainingTables(tables[1:])
|
||||
except IncompleteRoundException:
|
||||
pass
|
||||
|
||||
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
||||
|
||||
|
@ -177,10 +177,14 @@ class ResultExtractor:
|
||||
for filePair in files:
|
||||
with open(filePair[0], 'r') as fp:
|
||||
text = fp.read()
|
||||
with open(filePair[1], 'r') as fp:
|
||||
textTab = fp.read()
|
||||
parser = html_parser.HtmlParser(text, filePair[0])
|
||||
parserTab = html_parser.HtmlParser(textTab, filePair[1])
|
||||
|
||||
if filePair[1] is None:
|
||||
parserTab = None
|
||||
else:
|
||||
with open(filePair[1], 'r') as fp:
|
||||
textTab = fp.read()
|
||||
parserTab = html_parser.HtmlParser(textTab, filePair[1])
|
||||
|
||||
try:
|
||||
data = parser.guessDataFromHtmlTitle()
|
||||
@ -247,8 +251,11 @@ class ResultExtractor:
|
||||
self.l.debug('Extracting data from file %s', fileName)
|
||||
self._analyzeSingleParser(parsers[fileNameTuple][0], ret)
|
||||
|
||||
self.l.debug('Fetching individual result of combined competitions in %s', fileName)
|
||||
self._analyzeIndividualResults(parsers[fileNameTuple][1], ret)
|
||||
if parsers[fileNameTuple][1] is None:
|
||||
self.l.info('Skipping extraction of individual result as class is not yet finished.')
|
||||
else:
|
||||
self.l.debug('Fetching individual result of combined competitions in %s', fileName)
|
||||
self._analyzeIndividualResults(parsers[fileNameTuple][1], ret)
|
||||
|
||||
return ret
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user