Parse preview rounds in batch script
This commit is contained in:
parent
ffa1e36b6f
commit
9e59ca79d2
@ -177,46 +177,60 @@ class BatchWorker:
|
||||
self.l.debug(self.config.__dict__)
|
||||
|
||||
locator = solo_turnier.html_locator.HtmlLocator()
|
||||
self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
|
||||
htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
|
||||
self.l.debug('Found HTML file candidates: %s', htmlCandidates)
|
||||
self.l.info('Checking for feasible preview HTML export files in "%s"', self.config.importHtmlPath())
|
||||
htmlCandidatesPreview = locator.findPreviewRoundCandidates(self.config.importHtmlPath())
|
||||
self.l.debug('Found HTML file candidates for preview rounds: %s', htmlCandidatesPreview)
|
||||
|
||||
htmlParser = solo_turnier.html_parser.HtmlParser()
|
||||
htmlImports = []
|
||||
self.l.info('Importing the candidates')
|
||||
for candidate in htmlCandidates:
|
||||
self.l.debug('Processing file %s', candidate)
|
||||
with open(candidate, 'r') as fp:
|
||||
fileContent = fp.read()
|
||||
htmlImports.append(htmlParser.parseString(fileContent))
|
||||
self.l.info('All HTML files have been parsed')
|
||||
previewWorker = solo_turnier.worker.PreviewWorker()
|
||||
self.l.info('Filtering for pure preview rounds.')
|
||||
parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
|
||||
self.l.debug('Remaining files: %s', parsers.keys())
|
||||
|
||||
self.l.info('Extracting person data from the preview rounds.')
|
||||
previewWorker.extractPersonsFromPreview(parsers)
|
||||
|
||||
csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath())
|
||||
self.l.info('Importing the total result CSV file %s', self.config.importCSVPath())
|
||||
csvData = csvReader.readFile()
|
||||
self.l.info('CSV file has been read')
|
||||
|
||||
self.l.info('Processing the imported data')
|
||||
|
||||
csvExtractor = solo_turnier.worker.CSVExtractor()
|
||||
self.l.debug('Importing CSV data into internal structures')
|
||||
csvRows = csvExtractor.mapCSVImport(csvData)
|
||||
|
||||
worker = solo_turnier.worker.DataWorker()
|
||||
self.l.debug('Combining results from CSV for individual users')
|
||||
data = worker.combineRowsByPerson(csvRows)
|
||||
self.l.debug('Fix the groups for combined competitions')
|
||||
unambiguous, fixedGroups = worker.consolidateGroups(data)
|
||||
if fixedGroups:
|
||||
self.l.info('It was required to fix some group issues.')
|
||||
if not unambiguous:
|
||||
self.warning('There were couples whose group could not be extracted unambiguously.')
|
||||
self.l.debug('Merging HTML and CSV data')
|
||||
worker.mergeHtmlData(data, htmlImports)
|
||||
self.l.info('Data is prepared')
|
||||
|
||||
consoleOutputtter = solo_turnier.output.ConsoleOutputter()
|
||||
consoleOutputtter.output(data)
|
||||
# self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
|
||||
# htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
|
||||
# self.l.debug('Found HTML file candidates: %s', htmlCandidates)
|
||||
|
||||
# htmlParser = solo_turnier.html_parser.HtmlParser()
|
||||
# htmlImports = []
|
||||
# self.l.info('Importing the candidates')
|
||||
# for candidate in htmlCandidates:
|
||||
# self.l.debug('Processing file %s', candidate)
|
||||
# with open(candidate, 'r') as fp:
|
||||
# fileContent = fp.read()
|
||||
# htmlImports.append(htmlParser.parseString(fileContent))
|
||||
# self.l.info('All HTML files have been parsed')
|
||||
|
||||
|
||||
# self.l.info('Processing the imported data')
|
||||
|
||||
# csvExtractor = solo_turnier.worker.CSVExtractor()
|
||||
# self.l.debug('Importing CSV data into internal structures')
|
||||
# csvRows = csvExtractor.mapCSVImport(csvData)
|
||||
|
||||
# self.l.debug('Combining results from CSV for individual users')
|
||||
# data = worker.combineRowsByPerson(csvRows)
|
||||
# self.l.debug('Fix the groups for combined competitions')
|
||||
# unambiguous, fixedGroups = worker.consolidateGroups(data)
|
||||
# if fixedGroups:
|
||||
# self.l.info('It was required to fix some group issues.')
|
||||
# if not unambiguous:
|
||||
# self.warning('There were couples whose group could not be extracted unambiguously.')
|
||||
# self.l.debug('Merging HTML and CSV data')
|
||||
# worker.mergeHtmlData(data, htmlImports)
|
||||
# self.l.info('Data is prepared')
|
||||
|
||||
# consoleOutputtter = solo_turnier.output.ConsoleOutputter()
|
||||
# consoleOutputtter.output(data)
|
||||
|
||||
def run1(self):
|
||||
allResults, finals = self.__extractDataFromFiles()
|
||||
|
@ -170,7 +170,7 @@ class HtmlParser:
|
||||
def cleanPreparationRoundImport(self, data):
|
||||
def __cleanTable(table):
|
||||
def __cleanText(s: str):
|
||||
print("cleaning string ", s)
|
||||
# print("cleaning string ", s)
|
||||
return s.strip(' \n\xa0')
|
||||
|
||||
def __cleanEntry(entry):
|
||||
@ -180,7 +180,7 @@ class HtmlParser:
|
||||
|
||||
for row in table:
|
||||
for entry in row:
|
||||
print(entry)
|
||||
# print(entry)
|
||||
__cleanEntry(entry)
|
||||
|
||||
data['title'] = data['title'].strip()
|
||||
|
@ -4,6 +4,7 @@ import csv
|
||||
import os
|
||||
import logging
|
||||
import re
|
||||
from pprint import pformat
|
||||
|
||||
class AllResultReader:
|
||||
def __init__(self, fileName: str):
|
||||
@ -25,5 +26,6 @@ class AllResultReader:
|
||||
'data': rows[1:]
|
||||
}
|
||||
|
||||
logging.getLogger('solo_turnier.reader.all_results').debug('Imported results from allresults.csv file: %s', ret)
|
||||
l = logging.getLogger('solo_turnier.reader.all_results')
|
||||
l.log(5, 'Imported results from allresults.csv file: %s', (ret))
|
||||
return ret
|
||||
|
@ -20,6 +20,15 @@ class ResultRow:
|
||||
def __str__(self):
|
||||
return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}'
|
||||
|
||||
class HtmlPerson:
|
||||
def __init__(self, name, id, group):
|
||||
self.name = name
|
||||
self.id = id
|
||||
self.group = group
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.name} ({self.id}, {self.group})'
|
||||
|
||||
class ResultPerson:
|
||||
def __init__(self, firstName, lastName, club, id = None, group = None):
|
||||
self.firstName = firstName
|
||||
@ -148,6 +157,76 @@ class CSVExtractor:
|
||||
|
||||
return ret
|
||||
|
||||
class PreviewWorker:
|
||||
def __init__(self):
|
||||
self.l = logging.getLogger('solo_turnier.worker.PreviewWorker')
|
||||
|
||||
def filterFilesPreview(self, files: list[str]) -> dict[str, html_parser.HtmlParser]:
|
||||
self.l.debug('Filtering the list of parsers by removing all non preview entries.')
|
||||
ret = {}
|
||||
for file in files:
|
||||
with open(file, 'r') as fp:
|
||||
text = fp.read()
|
||||
|
||||
parser = html_parser.HtmlParser(text)
|
||||
|
||||
try:
|
||||
data = parser.guessDataFromHtmlTitle()
|
||||
except:
|
||||
self.l.error(f'Unable to parse html file in {file}. Please check manually.')
|
||||
continue
|
||||
|
||||
if data['class_'] == 'Sichtung':
|
||||
self.l.debug(f"Found candidate in {file}. Adding to the list.")
|
||||
ret[file] = parser
|
||||
else:
|
||||
self.l.debug(f'Rejecting file {file} as the name {data["class_"]} did not match.')
|
||||
|
||||
return ret
|
||||
|
||||
def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser):
|
||||
imported = parser.parsePreparationRound()
|
||||
parser.cleanPreparationRoundImport(imported)
|
||||
data = imported['data']
|
||||
|
||||
self.l.log(5, data)
|
||||
|
||||
if data['titles'][0] != 'Wertungsrichter':
|
||||
self.l.fatal('Cannot parse the parsed content of the preview file.')
|
||||
raise Exception('Incompatible export file')
|
||||
|
||||
ids = []
|
||||
names = []
|
||||
indices = []
|
||||
for index, e in enumerate(data['table'][0]):
|
||||
if e['text'] == '':
|
||||
continue
|
||||
indices.append(index)
|
||||
ids.append(e['text'])
|
||||
names.append(e['meta'])
|
||||
|
||||
groups = []
|
||||
|
||||
if data['titles'][-1] == 'Startgruppe':
|
||||
self.l.debug('Combined competition found. Extracting group from table')
|
||||
groups = [data['table'][-1][idx]['text'] for idx in indices]
|
||||
else:
|
||||
self.l.debug('Using group from the title.')
|
||||
group = parser.guessDataFromHtmlTitle(imported['title'])['group']
|
||||
groups = [group for i in indices]
|
||||
|
||||
ret = []
|
||||
for i in range(len(ids)):
|
||||
ret.append(HtmlPerson(names[i], ids[i], groups[i]))
|
||||
|
||||
self.l.log(5, ret)
|
||||
return ret
|
||||
|
||||
def extractPersonsFromPreview(self, parsers):
|
||||
for file in parsers:
|
||||
self.l.debug('Extracting person data from %s', file)
|
||||
self.__extractPersonsFromSinglePreview(parsers[file])
|
||||
|
||||
class DataWorker:
|
||||
def __init__(self):
|
||||
self.l = logging.getLogger('solo_turnier.worker')
|
||||
@ -297,3 +376,5 @@ class DataWorker:
|
||||
ret.append(competitions[0])
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user