Parse preview rounds in batch script
This commit is contained in:
parent
ffa1e36b6f
commit
9e59ca79d2
@ -177,46 +177,60 @@ class BatchWorker:
|
|||||||
self.l.debug(self.config.__dict__)
|
self.l.debug(self.config.__dict__)
|
||||||
|
|
||||||
locator = solo_turnier.html_locator.HtmlLocator()
|
locator = solo_turnier.html_locator.HtmlLocator()
|
||||||
self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
|
self.l.info('Checking for feasible preview HTML export files in "%s"', self.config.importHtmlPath())
|
||||||
htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
|
htmlCandidatesPreview = locator.findPreviewRoundCandidates(self.config.importHtmlPath())
|
||||||
self.l.debug('Found HTML file candidates: %s', htmlCandidates)
|
self.l.debug('Found HTML file candidates for preview rounds: %s', htmlCandidatesPreview)
|
||||||
|
|
||||||
htmlParser = solo_turnier.html_parser.HtmlParser()
|
previewWorker = solo_turnier.worker.PreviewWorker()
|
||||||
htmlImports = []
|
self.l.info('Filtering for pure preview rounds.')
|
||||||
self.l.info('Importing the candidates')
|
parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
|
||||||
for candidate in htmlCandidates:
|
self.l.debug('Remaining files: %s', parsers.keys())
|
||||||
self.l.debug('Processing file %s', candidate)
|
|
||||||
with open(candidate, 'r') as fp:
|
self.l.info('Extracting person data from the preview rounds.')
|
||||||
fileContent = fp.read()
|
previewWorker.extractPersonsFromPreview(parsers)
|
||||||
htmlImports.append(htmlParser.parseString(fileContent))
|
|
||||||
self.l.info('All HTML files have been parsed')
|
|
||||||
|
|
||||||
csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath())
|
csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath())
|
||||||
self.l.info('Importing the total result CSV file %s', self.config.importCSVPath())
|
self.l.info('Importing the total result CSV file %s', self.config.importCSVPath())
|
||||||
csvData = csvReader.readFile()
|
csvData = csvReader.readFile()
|
||||||
self.l.info('CSV file has been read')
|
self.l.info('CSV file has been read')
|
||||||
|
|
||||||
self.l.info('Processing the imported data')
|
|
||||||
|
|
||||||
csvExtractor = solo_turnier.worker.CSVExtractor()
|
|
||||||
self.l.debug('Importing CSV data into internal structures')
|
|
||||||
csvRows = csvExtractor.mapCSVImport(csvData)
|
|
||||||
|
|
||||||
worker = solo_turnier.worker.DataWorker()
|
worker = solo_turnier.worker.DataWorker()
|
||||||
self.l.debug('Combining results from CSV for individual users')
|
|
||||||
data = worker.combineRowsByPerson(csvRows)
|
|
||||||
self.l.debug('Fix the groups for combined competitions')
|
|
||||||
unambiguous, fixedGroups = worker.consolidateGroups(data)
|
|
||||||
if fixedGroups:
|
|
||||||
self.l.info('It was required to fix some group issues.')
|
|
||||||
if not unambiguous:
|
|
||||||
self.warning('There were couples whose group could not be extracted unambiguously.')
|
|
||||||
self.l.debug('Merging HTML and CSV data')
|
|
||||||
worker.mergeHtmlData(data, htmlImports)
|
|
||||||
self.l.info('Data is prepared')
|
|
||||||
|
|
||||||
consoleOutputtter = solo_turnier.output.ConsoleOutputter()
|
# self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
|
||||||
consoleOutputtter.output(data)
|
# htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
|
||||||
|
# self.l.debug('Found HTML file candidates: %s', htmlCandidates)
|
||||||
|
|
||||||
|
# htmlParser = solo_turnier.html_parser.HtmlParser()
|
||||||
|
# htmlImports = []
|
||||||
|
# self.l.info('Importing the candidates')
|
||||||
|
# for candidate in htmlCandidates:
|
||||||
|
# self.l.debug('Processing file %s', candidate)
|
||||||
|
# with open(candidate, 'r') as fp:
|
||||||
|
# fileContent = fp.read()
|
||||||
|
# htmlImports.append(htmlParser.parseString(fileContent))
|
||||||
|
# self.l.info('All HTML files have been parsed')
|
||||||
|
|
||||||
|
|
||||||
|
# self.l.info('Processing the imported data')
|
||||||
|
|
||||||
|
# csvExtractor = solo_turnier.worker.CSVExtractor()
|
||||||
|
# self.l.debug('Importing CSV data into internal structures')
|
||||||
|
# csvRows = csvExtractor.mapCSVImport(csvData)
|
||||||
|
|
||||||
|
# self.l.debug('Combining results from CSV for individual users')
|
||||||
|
# data = worker.combineRowsByPerson(csvRows)
|
||||||
|
# self.l.debug('Fix the groups for combined competitions')
|
||||||
|
# unambiguous, fixedGroups = worker.consolidateGroups(data)
|
||||||
|
# if fixedGroups:
|
||||||
|
# self.l.info('It was required to fix some group issues.')
|
||||||
|
# if not unambiguous:
|
||||||
|
# self.warning('There were couples whose group could not be extracted unambiguously.')
|
||||||
|
# self.l.debug('Merging HTML and CSV data')
|
||||||
|
# worker.mergeHtmlData(data, htmlImports)
|
||||||
|
# self.l.info('Data is prepared')
|
||||||
|
|
||||||
|
# consoleOutputtter = solo_turnier.output.ConsoleOutputter()
|
||||||
|
# consoleOutputtter.output(data)
|
||||||
|
|
||||||
def run1(self):
|
def run1(self):
|
||||||
allResults, finals = self.__extractDataFromFiles()
|
allResults, finals = self.__extractDataFromFiles()
|
||||||
|
@ -170,7 +170,7 @@ class HtmlParser:
|
|||||||
def cleanPreparationRoundImport(self, data):
|
def cleanPreparationRoundImport(self, data):
|
||||||
def __cleanTable(table):
|
def __cleanTable(table):
|
||||||
def __cleanText(s: str):
|
def __cleanText(s: str):
|
||||||
print("cleaning string ", s)
|
# print("cleaning string ", s)
|
||||||
return s.strip(' \n\xa0')
|
return s.strip(' \n\xa0')
|
||||||
|
|
||||||
def __cleanEntry(entry):
|
def __cleanEntry(entry):
|
||||||
@ -180,7 +180,7 @@ class HtmlParser:
|
|||||||
|
|
||||||
for row in table:
|
for row in table:
|
||||||
for entry in row:
|
for entry in row:
|
||||||
print(entry)
|
# print(entry)
|
||||||
__cleanEntry(entry)
|
__cleanEntry(entry)
|
||||||
|
|
||||||
data['title'] = data['title'].strip()
|
data['title'] = data['title'].strip()
|
||||||
|
@ -4,6 +4,7 @@ import csv
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
from pprint import pformat
|
||||||
|
|
||||||
class AllResultReader:
|
class AllResultReader:
|
||||||
def __init__(self, fileName: str):
|
def __init__(self, fileName: str):
|
||||||
@ -25,5 +26,6 @@ class AllResultReader:
|
|||||||
'data': rows[1:]
|
'data': rows[1:]
|
||||||
}
|
}
|
||||||
|
|
||||||
logging.getLogger('solo_turnier.reader.all_results').debug('Imported results from allresults.csv file: %s', ret)
|
l = logging.getLogger('solo_turnier.reader.all_results')
|
||||||
|
l.log(5, 'Imported results from allresults.csv file: %s', (ret))
|
||||||
return ret
|
return ret
|
||||||
|
@ -20,6 +20,15 @@ class ResultRow:
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}'
|
return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}'
|
||||||
|
|
||||||
|
class HtmlPerson:
|
||||||
|
def __init__(self, name, id, group):
|
||||||
|
self.name = name
|
||||||
|
self.id = id
|
||||||
|
self.group = group
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.name} ({self.id}, {self.group})'
|
||||||
|
|
||||||
class ResultPerson:
|
class ResultPerson:
|
||||||
def __init__(self, firstName, lastName, club, id = None, group = None):
|
def __init__(self, firstName, lastName, club, id = None, group = None):
|
||||||
self.firstName = firstName
|
self.firstName = firstName
|
||||||
@ -148,6 +157,76 @@ class CSVExtractor:
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
class PreviewWorker:
|
||||||
|
def __init__(self):
|
||||||
|
self.l = logging.getLogger('solo_turnier.worker.PreviewWorker')
|
||||||
|
|
||||||
|
def filterFilesPreview(self, files: list[str]) -> dict[str, html_parser.HtmlParser]:
|
||||||
|
self.l.debug('Filtering the list of parsers by removing all non preview entries.')
|
||||||
|
ret = {}
|
||||||
|
for file in files:
|
||||||
|
with open(file, 'r') as fp:
|
||||||
|
text = fp.read()
|
||||||
|
|
||||||
|
parser = html_parser.HtmlParser(text)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = parser.guessDataFromHtmlTitle()
|
||||||
|
except:
|
||||||
|
self.l.error(f'Unable to parse html file in {file}. Please check manually.')
|
||||||
|
continue
|
||||||
|
|
||||||
|
if data['class_'] == 'Sichtung':
|
||||||
|
self.l.debug(f"Found candidate in {file}. Adding to the list.")
|
||||||
|
ret[file] = parser
|
||||||
|
else:
|
||||||
|
self.l.debug(f'Rejecting file {file} as the name {data["class_"]} did not match.')
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser):
|
||||||
|
imported = parser.parsePreparationRound()
|
||||||
|
parser.cleanPreparationRoundImport(imported)
|
||||||
|
data = imported['data']
|
||||||
|
|
||||||
|
self.l.log(5, data)
|
||||||
|
|
||||||
|
if data['titles'][0] != 'Wertungsrichter':
|
||||||
|
self.l.fatal('Cannot parse the parsed content of the preview file.')
|
||||||
|
raise Exception('Incompatible export file')
|
||||||
|
|
||||||
|
ids = []
|
||||||
|
names = []
|
||||||
|
indices = []
|
||||||
|
for index, e in enumerate(data['table'][0]):
|
||||||
|
if e['text'] == '':
|
||||||
|
continue
|
||||||
|
indices.append(index)
|
||||||
|
ids.append(e['text'])
|
||||||
|
names.append(e['meta'])
|
||||||
|
|
||||||
|
groups = []
|
||||||
|
|
||||||
|
if data['titles'][-1] == 'Startgruppe':
|
||||||
|
self.l.debug('Combined competition found. Extracting group from table')
|
||||||
|
groups = [data['table'][-1][idx]['text'] for idx in indices]
|
||||||
|
else:
|
||||||
|
self.l.debug('Using group from the title.')
|
||||||
|
group = parser.guessDataFromHtmlTitle(imported['title'])['group']
|
||||||
|
groups = [group for i in indices]
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
for i in range(len(ids)):
|
||||||
|
ret.append(HtmlPerson(names[i], ids[i], groups[i]))
|
||||||
|
|
||||||
|
self.l.log(5, ret)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def extractPersonsFromPreview(self, parsers):
|
||||||
|
for file in parsers:
|
||||||
|
self.l.debug('Extracting person data from %s', file)
|
||||||
|
self.__extractPersonsFromSinglePreview(parsers[file])
|
||||||
|
|
||||||
class DataWorker:
|
class DataWorker:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.l = logging.getLogger('solo_turnier.worker')
|
self.l = logging.getLogger('solo_turnier.worker')
|
||||||
@ -297,3 +376,5 @@ class DataWorker:
|
|||||||
ret.append(competitions[0])
|
ret.append(competitions[0])
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user