Handle incomplete competitions as well
This commit is contained in:
parent
be5ac238bc
commit
03fed1e1e4
@ -8,6 +8,10 @@ from .types import HtmlPreviewImport as HtmlImport, HtmlResultImport
|
|||||||
from .group import GroupParser
|
from .group import GroupParser
|
||||||
from .competition_class import CompetitionClassParser
|
from .competition_class import CompetitionClassParser
|
||||||
|
|
||||||
|
class IncompleteRoundException(Exception):
|
||||||
|
def __init__(self, *args):
|
||||||
|
super(IncompleteRoundException, self).__init__(*args)
|
||||||
|
|
||||||
class HtmlParser:
|
class HtmlParser:
|
||||||
|
|
||||||
def __init__(self, text: str, fileName: str = None):
|
def __init__(self, text: str, fileName: str = None):
|
||||||
@ -73,7 +77,8 @@ class HtmlParser:
|
|||||||
def __parseFirstTable(table):
|
def __parseFirstTable(table):
|
||||||
roundName = table.tr.td.contents[0]
|
roundName = table.tr.td.contents[0]
|
||||||
if roundName != 'Endrunde':
|
if roundName != 'Endrunde':
|
||||||
raise Exception('Could not parse HTML file')
|
self.l.warning('Found table with round name %s.', roundName)
|
||||||
|
raise IncompleteRoundException('Could not parse HTML file')
|
||||||
|
|
||||||
__parseRows(table.find_all('tr')[2:], True)
|
__parseRows(table.find_all('tr')[2:], True)
|
||||||
|
|
||||||
@ -82,10 +87,14 @@ class HtmlParser:
|
|||||||
__parseRows(table.find_all('tr'), False)
|
__parseRows(table.find_all('tr'), False)
|
||||||
|
|
||||||
tables = self.soup.find('div', class_='extract').find_all('table')
|
tables = self.soup.find('div', class_='extract').find_all('table')
|
||||||
if len(tables) > 0:
|
|
||||||
__parseFirstTable(tables[0])
|
|
||||||
|
|
||||||
__parseRemainingTables(tables[1:])
|
try:
|
||||||
|
if len(tables) > 0:
|
||||||
|
__parseFirstTable(tables[0])
|
||||||
|
|
||||||
|
__parseRemainingTables(tables[1:])
|
||||||
|
except IncompleteRoundException:
|
||||||
|
pass
|
||||||
|
|
||||||
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
||||||
|
|
||||||
|
@ -177,10 +177,14 @@ class ResultExtractor:
|
|||||||
for filePair in files:
|
for filePair in files:
|
||||||
with open(filePair[0], 'r') as fp:
|
with open(filePair[0], 'r') as fp:
|
||||||
text = fp.read()
|
text = fp.read()
|
||||||
with open(filePair[1], 'r') as fp:
|
|
||||||
textTab = fp.read()
|
|
||||||
parser = html_parser.HtmlParser(text, filePair[0])
|
parser = html_parser.HtmlParser(text, filePair[0])
|
||||||
parserTab = html_parser.HtmlParser(textTab, filePair[1])
|
|
||||||
|
if filePair[1] is None:
|
||||||
|
parserTab = None
|
||||||
|
else:
|
||||||
|
with open(filePair[1], 'r') as fp:
|
||||||
|
textTab = fp.read()
|
||||||
|
parserTab = html_parser.HtmlParser(textTab, filePair[1])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = parser.guessDataFromHtmlTitle()
|
data = parser.guessDataFromHtmlTitle()
|
||||||
@ -247,8 +251,11 @@ class ResultExtractor:
|
|||||||
self.l.debug('Extracting data from file %s', fileName)
|
self.l.debug('Extracting data from file %s', fileName)
|
||||||
self._analyzeSingleParser(parsers[fileNameTuple][0], ret)
|
self._analyzeSingleParser(parsers[fileNameTuple][0], ret)
|
||||||
|
|
||||||
self.l.debug('Fetching individual result of combined competitions in %s', fileName)
|
if parsers[fileNameTuple][1] is None:
|
||||||
self._analyzeIndividualResults(parsers[fileNameTuple][1], ret)
|
self.l.info('Skipping extraction of individual result as class is not yet finished.')
|
||||||
|
else:
|
||||||
|
self.l.debug('Fetching individual result of combined competitions in %s', fileName)
|
||||||
|
self._analyzeIndividualResults(parsers[fileNameTuple][1], ret)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user