Written basic code to parse existsing exports successfully
This commit is contained in:
parent
d39a8d590e
commit
eafca2e9aa
24
.vscode/launch.json
vendored
Normal file
24
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Remote Attach",
|
||||||
|
"type": "python",
|
||||||
|
"request": "attach",
|
||||||
|
"connect": {
|
||||||
|
"host": "localhost",
|
||||||
|
"port": 5678
|
||||||
|
},
|
||||||
|
"pathMappings": [
|
||||||
|
{
|
||||||
|
"localRoot": "${workspaceFolder:code}",
|
||||||
|
"remoteRoot": "."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"justMyCode": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
@ -5,7 +5,8 @@
|
|||||||
"name": "code"
|
"name": "code"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": "../../../../../nextcloud/Documents/Projekte/SLT/Auswertungsskript Solo"
|
"path": "../files",
|
||||||
|
"name": "files"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"settings": {
|
"settings": {
|
||||||
|
@ -3,6 +3,7 @@ beautifulsoup4==4.11.1
|
|||||||
colorama==0.4.6
|
colorama==0.4.6
|
||||||
coloredlogs==15.0.1
|
coloredlogs==15.0.1
|
||||||
coverage==6.5.0
|
coverage==6.5.0
|
||||||
|
debugpy==1.6.7
|
||||||
exceptiongroup==1.0.1
|
exceptiongroup==1.0.1
|
||||||
humanfriendly==10.0
|
humanfriendly==10.0
|
||||||
iniconfig==1.1.1
|
iniconfig==1.1.1
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
_dir="$(dirname "$0")"
|
_dir="$(dirname "$0")"
|
||||||
|
|
||||||
if [ -n "$PTHONPATH" ]; then
|
if [ -n "$PYTHONPATH" ]; then
|
||||||
PYTHONPATH="$PYTHONPATH:$_dir/src"
|
PYTHONPATH="$PYTHONPATH:$_dir/src"
|
||||||
else
|
else
|
||||||
PYTHONPATH="$_dir/src"
|
PYTHONPATH="$_dir/src"
|
||||||
|
@ -181,21 +181,25 @@ class BatchWorker:
|
|||||||
htmlCandidatesPreview = locator.findPreviewRoundCandidates(self.config.importHtmlPath())
|
htmlCandidatesPreview = locator.findPreviewRoundCandidates(self.config.importHtmlPath())
|
||||||
self.l.debug('Found HTML file candidates for preview rounds: %s', htmlCandidatesPreview)
|
self.l.debug('Found HTML file candidates for preview rounds: %s', htmlCandidatesPreview)
|
||||||
|
|
||||||
worker = solo_turnier.worker.Worker()
|
htmlResultFiles = locator.findCandidates(self.config.importHtmlPath())
|
||||||
worker.collectAllData(htmlCandidatesPreview, self.config.importCSVPath())
|
self.l.debug('Using HTML result files for result extraction: %s', htmlResultFiles)
|
||||||
|
|
||||||
# csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath())
|
worker = solo_turnier.worker.Worker()
|
||||||
|
importedData = worker.collectAllData(htmlCandidatesPreview, self.config.importCSVPath(), htmlResultFiles)
|
||||||
|
worker.combineData(importedData)
|
||||||
|
|
||||||
|
# csvReader = solo_turnier.reader.CSVResultReader(self.config.importCSVPath())
|
||||||
# self.l.info('Loading the total result CSV file %s', self.config.importCSVPath())
|
# self.l.info('Loading the total result CSV file %s', self.config.importCSVPath())
|
||||||
# csvData = csvReader.readFile()
|
# csvData = csvReader.readFile()
|
||||||
# self.l.info('CSV file has been read')
|
# self.l.info('CSV file has been read')
|
||||||
|
|
||||||
# csvExtractor = solo_turnier.worker.CSVExtractor()
|
# csvExtractor = solo_turnier.reader.CSVExtractor()
|
||||||
# self.l.info('Importing CSV data into internal structures')
|
# self.l.info('Importing CSV data into internal structures')
|
||||||
# csvRows = csvExtractor.mapCSVImport(csvData)
|
# csvRows = csvExtractor.mapCSVImport(csvData)
|
||||||
|
|
||||||
# worker = solo_turnier.worker.DataWorker()
|
# worker = solo_turnier.worker.DataWorker()
|
||||||
|
|
||||||
|
# /////
|
||||||
|
|
||||||
# self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
|
# self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
|
||||||
# htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
|
# htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
import debugpy
|
||||||
|
|
||||||
class Cli:
|
class Cli:
|
||||||
def __init__(self, l: logging.Logger):
|
def __init__(self, l: logging.Logger):
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@ -11,8 +13,13 @@ class Cli:
|
|||||||
parser.add_argument('-o', '--output', help='Set the output path of the script', nargs=1, default=[None])
|
parser.add_argument('-o', '--output', help='Set the output path of the script', nargs=1, default=[None])
|
||||||
|
|
||||||
parser.add_argument('-v', '--verbose', help='Increase verbosity', action='count', default=0)
|
parser.add_argument('-v', '--verbose', help='Increase verbosity', action='count', default=0)
|
||||||
|
parser.add_argument('-d', '--debug', action='store_true', help='Activate debugging during startup')
|
||||||
self.__args = parser.parse_args()
|
self.__args = parser.parse_args()
|
||||||
|
|
||||||
|
if self.__args.debug:
|
||||||
|
debugpy.listen(5678)
|
||||||
|
debugpy.wait_for_client()
|
||||||
|
|
||||||
map = {
|
map = {
|
||||||
0: logging.ERROR,
|
0: logging.ERROR,
|
||||||
1: logging.WARN,
|
1: logging.WARN,
|
||||||
|
@ -78,3 +78,19 @@ class GroupParser:
|
|||||||
def isPureClass(self, cls: str) -> bool:
|
def isPureClass(self, cls: str) -> bool:
|
||||||
parsedClass = self.parseClass(cls)
|
parsedClass = self.parseClass(cls)
|
||||||
return isinstance(parsedClass, Group)
|
return isinstance(parsedClass, Group)
|
||||||
|
|
||||||
|
def getGroups(self) -> list[Group]:
|
||||||
|
return[
|
||||||
|
GroupParser.KIN,
|
||||||
|
GroupParser.JUN,
|
||||||
|
GroupParser.JUG,
|
||||||
|
GroupParser.HGR,
|
||||||
|
GroupParser.MAS1,
|
||||||
|
GroupParser.MAS2,
|
||||||
|
GroupParser.MAS3,
|
||||||
|
GroupParser.MAS4,
|
||||||
|
GroupParser.MAS5
|
||||||
|
]
|
||||||
|
|
||||||
|
def getGroupsAsSortedList(self, groups) -> list[Group]:
|
||||||
|
return [x for x in self.getGroups() if x in groups]
|
||||||
|
@ -3,8 +3,10 @@ from bs4 import BeautifulSoup
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .types import HtmlPreviewParticipant as HtmlParticipant
|
from .types import HtmlPreviewParticipant, HtmlParticipant
|
||||||
from .types import HtmlPreviewImport as HtmlImport
|
from .types import HtmlPreviewImport as HtmlImport, HtmlResultImport
|
||||||
|
from .group import GroupParser
|
||||||
|
from .competition_class import CompetitionClassParser
|
||||||
|
|
||||||
class HtmlParser:
|
class HtmlParser:
|
||||||
|
|
||||||
@ -12,6 +14,8 @@ class HtmlParser:
|
|||||||
self.l = logging.getLogger('solo_turnier.html_parser')
|
self.l = logging.getLogger('solo_turnier.html_parser')
|
||||||
self.soup = BeautifulSoup(text, 'html.parser')
|
self.soup = BeautifulSoup(text, 'html.parser')
|
||||||
self.fileName = fileName
|
self.fileName = fileName
|
||||||
|
self.groupParser = GroupParser()
|
||||||
|
self.classParser = CompetitionClassParser()
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if self.fileName is None:
|
if self.fileName is None:
|
||||||
@ -33,32 +37,22 @@ class HtmlParser:
|
|||||||
rest = match.group(1)
|
rest = match.group(1)
|
||||||
rawGroup, rawClass, dance = rest.split(' ', 2)
|
rawGroup, rawClass, dance = rest.split(' ', 2)
|
||||||
|
|
||||||
classMap = {
|
|
||||||
'Newcomer': 'Newc.',
|
|
||||||
'Beginner': 'Beg.',
|
|
||||||
'Advanced': 'Adv.'
|
|
||||||
}
|
|
||||||
|
|
||||||
groupMap = {
|
|
||||||
'Kinder': 'Kin.',
|
|
||||||
'Junioren': 'Jun.',
|
|
||||||
'Jugend': 'Jug.',
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'dance': dance.strip(),
|
'dance': dance.strip(),
|
||||||
'class_': classMap.get(rawClass, rawClass),
|
'class_': str(self.classParser.parseClass(rawClass, True)),
|
||||||
'group': groupMap.get(rawGroup, rawGroup)
|
'group': str(self.groupParser.parseClass(rawGroup))
|
||||||
}
|
}
|
||||||
|
|
||||||
def parseString(self, text: str):
|
def parseResult(self):
|
||||||
soup = BeautifulSoup(text, 'html.parser')
|
|
||||||
|
|
||||||
participants = {}
|
participants = {}
|
||||||
|
|
||||||
def __parseRows(rows, finalist: bool):
|
def __parseRows(rows, finalist: bool):
|
||||||
def __parseRow(row):
|
def __parseRow(row):
|
||||||
tds = row.find_all('td')
|
tds = row.find_all('td')
|
||||||
|
|
||||||
|
if len(tds) != 2:
|
||||||
|
return
|
||||||
|
|
||||||
regex = re.compile('(.*) \\(([0-9]+)\\)')
|
regex = re.compile('(.*) \\(([0-9]+)\\)')
|
||||||
|
|
||||||
place = tds[0].contents[0]
|
place = tds[0].contents[0]
|
||||||
@ -69,8 +63,9 @@ class HtmlParser:
|
|||||||
name = match.group(1)
|
name = match.group(1)
|
||||||
number = match.group(2)
|
number = match.group(2)
|
||||||
|
|
||||||
participant = HtmlParticipant(name, place, finalist)
|
participant = HtmlParticipant(name, number)
|
||||||
participants[number] = participant
|
participant.finalist = finalist
|
||||||
|
participants[participant] = place
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
__parseRow(row)
|
__parseRow(row)
|
||||||
@ -84,17 +79,18 @@ class HtmlParser:
|
|||||||
|
|
||||||
def __parseRemainingTables(tables):
|
def __parseRemainingTables(tables):
|
||||||
for table in tables:
|
for table in tables:
|
||||||
__parseRows(table.find_all('tr')[2:], False)
|
__parseRows(table.find_all('tr'), False)
|
||||||
|
|
||||||
tables = soup.find('div', class_='extract').find_all('table')
|
tables = self.soup.find('div', class_='extract').find_all('table')
|
||||||
if len(tables) > 0:
|
if len(tables) > 0:
|
||||||
__parseFirstTable(tables[0])
|
__parseFirstTable(tables[0])
|
||||||
|
|
||||||
__parseRemainingTables(tables[1:])
|
__parseRemainingTables(tables[1:])
|
||||||
|
|
||||||
title = soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
||||||
|
|
||||||
ret = HtmlImport(title, participants)
|
# ret = HtmlImport(title, participants)
|
||||||
|
ret = HtmlResultImport(participants)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def parsePreparationRound(self):
|
def parsePreparationRound(self):
|
||||||
|
@ -21,10 +21,11 @@ class CSVResultRow:
|
|||||||
return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}'
|
return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}'
|
||||||
|
|
||||||
class HtmlPreviewParticipant:
|
class HtmlPreviewParticipant:
|
||||||
def __init__(self, name, id, participant_group):
|
def __init__(self, name, id, group_):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.id = id
|
self.id = id
|
||||||
self.group = group.GroupParser().parseClass(participant_group)
|
groupParser = group.GroupParser()
|
||||||
|
self.group = groupParser.parseClass(group_)
|
||||||
|
|
||||||
def __eq__(self, o):
|
def __eq__(self, o):
|
||||||
if type(o) != HtmlPreviewParticipant:
|
if type(o) != HtmlPreviewParticipant:
|
||||||
@ -33,14 +34,52 @@ class HtmlPreviewParticipant:
|
|||||||
return all(map(lambda x, y: x == y, (self.name, self.id, self.group), (o.name, o.id, o.group)))
|
return all(map(lambda x, y: x == y, (self.name, self.id, self.group), (o.name, o.id, o.group)))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f'{self.id}: {self.name} ({self.group})'
|
return f'{self.id} ({self.name}, {self.group})'
|
||||||
|
|
||||||
class HtmlPreviewImport:
|
def __hash__(self):
|
||||||
def __init__(self, participants: dict[int, HtmlPreviewParticipant]):
|
return hash((self.id, self.name, self.group))
|
||||||
self.participants = participants
|
|
||||||
|
class HtmlParticipant:
|
||||||
|
def __init__(self, name, id):
|
||||||
|
self.name = name
|
||||||
|
self.id = id
|
||||||
|
self.finalist = None
|
||||||
|
|
||||||
|
def __eq__(self, o):
|
||||||
|
if type(o) != HtmlPreviewParticipant:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return all(map(lambda x, y: x == y, (self.name, self.id, self.group), (o.name, o.id, o.group)))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self.participants)
|
return f'{self.id}: {self.name}'
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.id, self.name))
|
||||||
|
|
||||||
|
# class PreviewParticipationData:
|
||||||
|
# def __init__(self, dance: str, class_: competition_class.CompetitionClass):
|
||||||
|
# self.class_ = class_
|
||||||
|
# self.dance = dance
|
||||||
|
|
||||||
|
class HtmlPreviewImport:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
participants: dict[int, list[HtmlPreviewParticipant]],
|
||||||
|
results: dict[HtmlPreviewParticipant, dict[str, competition_class.CompetitionClass]]
|
||||||
|
):
|
||||||
|
self.participants = participants
|
||||||
|
self.results = results
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return (str(self.participants), str(self.results))
|
||||||
|
|
||||||
|
class HtmlResultImport:
|
||||||
|
def __init__(self, results: dict[HtmlParticipant, str]):
|
||||||
|
self.results = results
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return str(self.results)
|
||||||
|
|
||||||
class HtmlCompetitionResultRow:
|
class HtmlCompetitionResultRow:
|
||||||
def __init__(self, name, id, dance, group, class_, place, placeTo, finalist):
|
def __init__(self, name, id, dance, group, class_, place, placeTo, finalist):
|
||||||
@ -84,6 +123,17 @@ class HtmlSingleCompetitionResult:
|
|||||||
self.placeTo = placeTo
|
self.placeTo = placeTo
|
||||||
self.finalist = finalist
|
self.finalist = finalist
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if self.placeTo is None:
|
||||||
|
place = self.place
|
||||||
|
else:
|
||||||
|
place = f'{self.place}-{self.placeTo}'
|
||||||
|
|
||||||
|
if self.finalist:
|
||||||
|
return f'Res({self.name} [F], placed {place})'
|
||||||
|
else:
|
||||||
|
return f'Res({self.name}, placed {place})'
|
||||||
|
|
||||||
class HtmlCompetitionTotalResults:
|
class HtmlCompetitionTotalResults:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.results = {}
|
self.results = {}
|
||||||
@ -94,12 +144,65 @@ class HtmlCompetitionTotalResults:
|
|||||||
def get(self, group: group.Group_t, class_: competition_class.Class_t, dance: str, id: int) -> list[HtmlSingleCompetitionResult]:
|
def get(self, group: group.Group_t, class_: competition_class.Class_t, dance: str, id: int) -> list[HtmlSingleCompetitionResult]:
|
||||||
return self.results[self.__getTuple(group, class_, dance, id)]
|
return self.results[self.__getTuple(group, class_, dance, id)]
|
||||||
|
|
||||||
|
def getById(self, id: int) -> dict[tuple[str, group.Group_t, competition_class.Class_t], HtmlSingleCompetitionResult]:
|
||||||
|
ret = {}
|
||||||
|
|
||||||
|
for k in self.results:
|
||||||
|
if int(k[3]) != id:
|
||||||
|
continue
|
||||||
|
# ret = ret + self.results[k]
|
||||||
|
# Dance, Group, Class
|
||||||
|
key = (k[2], k[0], k[1])
|
||||||
|
ret[key] = self.results[k]
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
def add(self, group, class_, dance, id, result: HtmlSingleCompetitionResult):
|
def add(self, group, class_, dance, id, result: HtmlSingleCompetitionResult):
|
||||||
tup = self.__getTuple(group, class_, dance, id)
|
tup = self.__getTuple(group, class_, dance, id)
|
||||||
l = self.results.get(tup, [])
|
l = self.results.get(tup, [])
|
||||||
l.append(result)
|
l.append(result)
|
||||||
self.results[tup] = l
|
self.results[tup] = l
|
||||||
|
|
||||||
|
class SingleParticipantResult:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
competitionClass: competition_class.Class_t,
|
||||||
|
dance: str,
|
||||||
|
finalist: bool,
|
||||||
|
place: int,
|
||||||
|
placeTo: int|None
|
||||||
|
):
|
||||||
|
self.competitionClass = competitionClass
|
||||||
|
self.dance = dance
|
||||||
|
self.finalist = finalist
|
||||||
|
self.place = place
|
||||||
|
self.placeTo = placeTo
|
||||||
|
|
||||||
|
if placeTo == place:
|
||||||
|
self.placeTo = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
asFinalist = ' as finalist' if self.finalist else ''
|
||||||
|
|
||||||
|
if self.placeTo is None:
|
||||||
|
return f'SR[{self.place} in {self.dance} {self.competitionClass}{asFinalist}]'
|
||||||
|
|
||||||
|
return f'SR[{self.place}-{self.placeTo} in {self.dance} {self.competitionClass}{asFinalist}]'
|
||||||
|
|
||||||
|
class TotalGroupResult:
|
||||||
|
def __init__(self, dances: list[str], results: dict[HtmlPreviewParticipant, list[SingleParticipantResult]]):
|
||||||
|
self.dances = dances
|
||||||
|
self.results = results
|
||||||
|
|
||||||
|
class State4:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
resultPerGroup: dict[group.Group, TotalGroupResult]
|
||||||
|
):
|
||||||
|
parser = group.GroupParser()
|
||||||
|
self.groups = parser.getGroupsAsSortedList(resultPerGroup.keys())
|
||||||
|
self.results = resultPerGroup
|
||||||
|
|
||||||
class State3:
|
class State3:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
import logging
|
import logging
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
import solo_turnier
|
import solo_turnier
|
||||||
from solo_turnier import html_parser
|
from solo_turnier import html_parser
|
||||||
from .reader import ResultRow
|
from .reader import ResultRow
|
||||||
from .types import HtmlCompetitionResultRow as CompetitionResult
|
from .types import HtmlCompetitionResultRow as CompetitionResult
|
||||||
from . import types
|
from . import types
|
||||||
|
from . import competition_class
|
||||||
|
|
||||||
class HtmlPerson:
|
class HtmlPerson:
|
||||||
def __init__(self, name, id, group):
|
def __init__(self, name, id, group):
|
||||||
@ -72,6 +75,7 @@ class PreviewWorker:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.l = logging.getLogger('solo_turnier.worker.PreviewWorker')
|
self.l = logging.getLogger('solo_turnier.worker.PreviewWorker')
|
||||||
self.participants = {}
|
self.participants = {}
|
||||||
|
self.previewResults = {}
|
||||||
|
|
||||||
def filterFilesPreview(self, files: list[str]) -> ParserList_t:
|
def filterFilesPreview(self, files: list[str]) -> ParserList_t:
|
||||||
self.l.debug('Filtering the list of parsers by removing all non preview entries.')
|
self.l.debug('Filtering the list of parsers by removing all non preview entries.')
|
||||||
@ -101,6 +105,12 @@ class PreviewWorker:
|
|||||||
parser.cleanPreparationRoundImport(imported)
|
parser.cleanPreparationRoundImport(imported)
|
||||||
data = imported['data']
|
data = imported['data']
|
||||||
|
|
||||||
|
headerData = parser.guessDataFromHtmlTitle()
|
||||||
|
dance = headerData['dance']
|
||||||
|
|
||||||
|
def getRowIndexOfClass():
|
||||||
|
return data['titles'].index('Platz von\nPlatz bis')
|
||||||
|
|
||||||
self.l.log(5, data)
|
self.l.log(5, data)
|
||||||
|
|
||||||
if data['titles'][0] != 'Wertungsrichter':
|
if data['titles'][0] != 'Wertungsrichter':
|
||||||
@ -115,6 +125,8 @@ class PreviewWorker:
|
|||||||
group = parser.guessDataFromHtmlTitle(imported['title'])['group']
|
group = parser.guessDataFromHtmlTitle(imported['title'])['group']
|
||||||
extractGroup = False
|
extractGroup = False
|
||||||
|
|
||||||
|
classRowIndex = getRowIndexOfClass()
|
||||||
|
|
||||||
for index, e in enumerate(data['table'][0]):
|
for index, e in enumerate(data['table'][0]):
|
||||||
if e['text'] == '':
|
if e['text'] == '':
|
||||||
# Skip empty columns
|
# Skip empty columns
|
||||||
@ -126,6 +138,9 @@ class PreviewWorker:
|
|||||||
if extractGroup:
|
if extractGroup:
|
||||||
group = data['table'][-1][index]['text']
|
group = data['table'][-1][index]['text']
|
||||||
|
|
||||||
|
# dance =
|
||||||
|
class_ = data['table'][classRowIndex][index]['text']
|
||||||
|
|
||||||
participant = types.HtmlPreviewParticipant(name, id, group)
|
participant = types.HtmlPreviewParticipant(name, id, group)
|
||||||
|
|
||||||
l = self.participants.get(id, [])
|
l = self.participants.get(id, [])
|
||||||
@ -134,6 +149,10 @@ class PreviewWorker:
|
|||||||
l.append(participant)
|
l.append(participant)
|
||||||
self.participants[id] = l
|
self.participants[id] = l
|
||||||
|
|
||||||
|
results = self.previewResults.get(participant, {})
|
||||||
|
results[dance] = class_
|
||||||
|
self.previewResults[participant] = results
|
||||||
|
|
||||||
def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport:
|
def importAllData(self, parsers: ParserList_t) -> types.HtmlPreviewImport:
|
||||||
self.participants = {}
|
self.participants = {}
|
||||||
|
|
||||||
@ -141,7 +160,79 @@ class PreviewWorker:
|
|||||||
parser = parsers[file]
|
parser = parsers[file]
|
||||||
self.__extractPersonsFromSinglePreview(parser)
|
self.__extractPersonsFromSinglePreview(parser)
|
||||||
|
|
||||||
return types.HtmlPreviewImport(self.participants)
|
return types.HtmlPreviewImport(self.participants, self.previewResults)
|
||||||
|
|
||||||
|
class ResultExtractor:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.l = logging.getLogger('solo_turnier.worker.ResultExtractor')
|
||||||
|
self.rePlaceSingle = re.compile(' *([0-9]+) *')
|
||||||
|
self.rePlaceDouble = re.compile(' *([0-9]+) *- *([0-9]+) *')
|
||||||
|
|
||||||
|
def getAllParsers(self, files: list[str]) -> ParserList_t:
|
||||||
|
ret = {}
|
||||||
|
classParser = competition_class.CompetitionClassParser()
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
with open(file, 'r') as fp:
|
||||||
|
text = fp.read()
|
||||||
|
parser = html_parser.HtmlParser(text, file)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = parser.guessDataFromHtmlTitle()
|
||||||
|
except:
|
||||||
|
self.l.error('Cannot parse HTML file %s to check if it is a valid result. Check manually.', file)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
guessedClass = classParser.parseClass(data['class_'])
|
||||||
|
except:
|
||||||
|
self.l.error('Issue parsing class of file %s. Check manually.', file)
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.l.debug('Fetched result data: %s, guessed class %s', data, guessedClass)
|
||||||
|
ret[file] = parser
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def _extractPlace(self, placeStr: str):
|
||||||
|
s = placeStr.replace('.', '')
|
||||||
|
|
||||||
|
matches = self.rePlaceSingle.fullmatch(s)
|
||||||
|
if matches is not None:
|
||||||
|
return (int(matches.group(1)), None)
|
||||||
|
|
||||||
|
matches = self.rePlaceDouble.fullmatch(s)
|
||||||
|
if matches is not None:
|
||||||
|
return (int(matches.group(1)), int(matches.group(2)))
|
||||||
|
|
||||||
|
self.l.error('Could not parse place string "%s"', placeStr)
|
||||||
|
raise Exception('Place cannot be parsed')
|
||||||
|
|
||||||
|
def _analyzeSingleParser(self, parser: html_parser.HtmlParser, results: types.HtmlCompetitionTotalResults):
|
||||||
|
data = parser.guessDataFromHtmlTitle()
|
||||||
|
competitionClass = data['class_']
|
||||||
|
competitionGroup = data['group']
|
||||||
|
dance = data['dance']
|
||||||
|
|
||||||
|
result = parser.parseResult()
|
||||||
|
self.l.log(5, 'Raw data extracted: %s', result)
|
||||||
|
|
||||||
|
for person in result.results.keys():
|
||||||
|
placeStr = result.results[person]
|
||||||
|
place, placeTo = self._extractPlace(placeStr)
|
||||||
|
competitionResult = types.HtmlSingleCompetitionResult(person.name, place, placeTo, person.finalist)
|
||||||
|
results.add(competitionGroup, competitionClass, dance, person.id, competitionResult)
|
||||||
|
#
|
||||||
|
|
||||||
|
def extractAllData(self, parsers: ParserList_t) -> types.HtmlCompetitionTotalResults:
|
||||||
|
ret = types.HtmlCompetitionTotalResults()
|
||||||
|
|
||||||
|
for fileName in parsers:
|
||||||
|
self.l.debug('Extracting data from file %s', fileName)
|
||||||
|
self._analyzeSingleParser(parsers[fileName], ret)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
class DataWorker:
|
class DataWorker:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -296,24 +387,146 @@ class DataWorker:
|
|||||||
class Worker:
|
class Worker:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.l = logging.getLogger('solo_turnier.worker.Worker')
|
self.l = logging.getLogger('solo_turnier.worker.Worker')
|
||||||
|
self._allDances = (
|
||||||
|
['Samba', 'Cha Cha', 'Rumba', 'Paso Doble', 'Jive'] +
|
||||||
|
['Langs. Walzer', 'Tango', 'Wiener Walzer', 'Slowfox', 'Quickstep']
|
||||||
|
)
|
||||||
|
|
||||||
def collectAllData(
|
def collectAllData(
|
||||||
self,
|
self,
|
||||||
htmlCandidatesPreview: list[str],
|
htmlCandidatesPreview: list[str],
|
||||||
csvFile: str
|
csvFile: str,
|
||||||
|
htmlResultsFileNames: list[str]
|
||||||
) -> types.State3:
|
) -> types.State3:
|
||||||
|
|
||||||
previewWorker = PreviewWorker()
|
previewWorker = PreviewWorker()
|
||||||
self.l.info('Filtering for pure preview rounds.')
|
self.l.info('Filtering for pure preview rounds.')
|
||||||
parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
|
parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
|
||||||
self.l.debug('Remaining files: %s', parsers.keys())
|
self.l.debug('Remaining files: %s', list(parsers.keys()))
|
||||||
|
|
||||||
self.l.info('Extracting person data from the preview rounds.')
|
self.l.info('Extracting person data from the preview rounds.')
|
||||||
previewImport = previewWorker.importAllData(parsers)
|
previewImport = previewWorker.importAllData(parsers)
|
||||||
self.l.debug('Total preview import: %s', previewImport)
|
self.l.debug('Total preview imported participants: %s', pformat(previewImport.participants))
|
||||||
|
self.l.log(5, 'Total preview results: %s', pformat(previewImport.results))
|
||||||
|
|
||||||
csvReader = solo_turnier.reader.CSVResultReader(csvFile)
|
csvReader = solo_turnier.reader.CSVResultReader(csvFile)
|
||||||
self.l.info('Loading the total result CSV file %s', csvFile)
|
self.l.info('Loading the total result CSV file %s', csvFile)
|
||||||
csvRows = csvReader.extractResult()
|
csvRows = csvReader.extractResult()
|
||||||
|
|
||||||
return None
|
resultExtractor = ResultExtractor()
|
||||||
|
resultParsers = resultExtractor.getAllParsers(htmlResultsFileNames)
|
||||||
|
htmlResults = resultExtractor.extractAllData(resultParsers)
|
||||||
|
self.l.info('Overall result data extracted: %s', pformat(htmlResults.results))
|
||||||
|
|
||||||
|
return types.State3(csvRows, previewImport, htmlResults)
|
||||||
|
|
||||||
|
def combineData(self, importedData: types.State3):
|
||||||
|
self.l.info('Starting to build data sets.')
|
||||||
|
groups = self._extractGroups(importedData)
|
||||||
|
self.l.debug('Found groups in the dataset: %s', groups)
|
||||||
|
|
||||||
|
totalResult = {}
|
||||||
|
|
||||||
|
for group in groups:
|
||||||
|
self.l.debug('Collecting data for total result of group %s', group)
|
||||||
|
|
||||||
|
dances = self._extractDancesPerGroup(importedData, group)
|
||||||
|
self.l.log(5, 'Found dances in group %s: %s', group, dances)
|
||||||
|
|
||||||
|
participants = self._extractParticipantsPerGroup(importedData.previewImport, group)
|
||||||
|
self.l.log(5, 'Related participants %s', participants)
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for participant in participants:
|
||||||
|
self.l.log(5, 'Collecting data for %s', participant)
|
||||||
|
resultsOfParticipant = self._getResultOfSingleParticipant(
|
||||||
|
participant, group, importedData.previewImport, importedData.htmlResults, dances
|
||||||
|
)
|
||||||
|
self.l.log(5, 'Obtained result %s', resultsOfParticipant)
|
||||||
|
results[participant] = resultsOfParticipant
|
||||||
|
|
||||||
|
totalResult[group] = types.TotalGroupResult(dances, results)
|
||||||
|
|
||||||
|
self.l.log(5, 'Total result of all groups: %s', pformat(totalResult))
|
||||||
|
|
||||||
|
ret = types.State4(totalResult)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def _extractGroups(self, data: types.State3):
|
||||||
|
groupSet = set([])
|
||||||
|
for id in data.previewImport.participants:
|
||||||
|
participants = data.previewImport.participants[id]
|
||||||
|
for participant in participants:
|
||||||
|
groupSet.add(participant.group)
|
||||||
|
|
||||||
|
self.l.log(5, 'Set of active groups: %s', groupSet)
|
||||||
|
groupParser = solo_turnier.group.GroupParser()
|
||||||
|
groups = groupParser.getGroupsAsSortedList(groupSet)
|
||||||
|
return groups
|
||||||
|
|
||||||
|
def _extractDancesPerGroup(self, data: types.State3, group: solo_turnier.group.Group):
|
||||||
|
dances = set()
|
||||||
|
additionalDances = set()
|
||||||
|
for part in data.previewImport.results.keys():
|
||||||
|
allFoundDances = set(data.previewImport.results[part].keys())
|
||||||
|
dances.update(allFoundDances.intersection(self._allDances))
|
||||||
|
additionalDances.update(allFoundDances.difference(self._allDances))
|
||||||
|
|
||||||
|
if len(additionalDances) > 0:
|
||||||
|
self.l.warning('There were dances found, that are not registered. A bug? The dances were: %s', additionalDances)
|
||||||
|
|
||||||
|
dancesList = [x for x in self._allDances if x in dances]
|
||||||
|
additionalDancesList = list(additionalDances)
|
||||||
|
additionalDancesList.sort()
|
||||||
|
return dancesList + additionalDancesList
|
||||||
|
|
||||||
|
def _extractParticipantsPerGroup(
|
||||||
|
self,
|
||||||
|
previewData: types.HtmlPreviewImport,
|
||||||
|
group: solo_turnier.group.Group
|
||||||
|
) -> list[types.HtmlPreviewParticipant]:
|
||||||
|
ret = []
|
||||||
|
for id in previewData.participants:
|
||||||
|
participantList = previewData.participants[id]
|
||||||
|
for participant in participantList:
|
||||||
|
if participant.group == group:
|
||||||
|
ret.append(participant)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def _getResultOfSingleParticipant(
|
||||||
|
self,
|
||||||
|
participant: types.HtmlPreviewParticipant,
|
||||||
|
nominalGroup: solo_turnier.group.Group,
|
||||||
|
previewResults: types.HtmlPreviewImport,
|
||||||
|
totalResults: types.HtmlCompetitionTotalResults,
|
||||||
|
allDances: list[str]
|
||||||
|
) -> list[types.SingleParticipantResult|None]:
|
||||||
|
rawResults = totalResults.getById(participant.id)
|
||||||
|
self.l.log(5, 'Found result data (raw): %s', rawResults)
|
||||||
|
|
||||||
|
results = [None for x in allDances]
|
||||||
|
|
||||||
|
for danceIdx, dance in enumerate(allDances):
|
||||||
|
# self.l.log(5, '%s %s', dance, danceIdx)
|
||||||
|
def getResult() -> types.SingleParticipantResult|None:
|
||||||
|
for key in rawResults:
|
||||||
|
if key[0] != dance:
|
||||||
|
continue
|
||||||
|
rawResult = rawResults[key]
|
||||||
|
|
||||||
|
if len(rawResult) != 1:
|
||||||
|
raise Exception('Multiple results found with same key')
|
||||||
|
rawResult = rawResult[0]
|
||||||
|
|
||||||
|
# self.l.log(5, 'Result %s => %s', key, rawResult)
|
||||||
|
return types.SingleParticipantResult(
|
||||||
|
key[2], dance, rawResult.finalist,
|
||||||
|
rawResult.place, rawResult.placeTo
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
results[danceIdx] = getResult()
|
||||||
|
|
||||||
|
return results
|
||||||
|
Loading…
Reference in New Issue
Block a user