Create class to look for possible result sets

This commit is contained in:
Christian Wolf 2022-11-13 18:04:49 +01:00
parent 0d978221f1
commit 9d88d09a97
21 changed files with 124 additions and 132 deletions

View File

@ -0,0 +1,24 @@
import os
import logging
class HtmlLocator:
def __init__(self):
self.l = logging.getLogger('solo_turnier.html_locator')
self.fileName = 'erg.htm'
def __findRecursivelyCandidates(self, path: str):
ret = []
ls = os.listdir(path)
if self.fileName in ls and os.path.isfile(os.path.join(path, self.fileName)):
ret.append(os.path.join(path, self.fileName))
for p in ls:
subPath = os.path.join(path, p)
if os.path.isdir(subPath):
ret = ret + self.__findRecursivelyCandidates(subPath)
return ret
def findCandidates(self, path: str):
return self.__findRecursivelyCandidates(path)

View File

@ -1,5 +1,4 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import bs4
import logging import logging
import re import re
@ -12,7 +11,7 @@ class HtmlParser:
def parseString(self, text: str): def parseString(self, text: str):
soup = BeautifulSoup(text, 'html.parser') soup = BeautifulSoup(text, 'html.parser')
ret = {} participants = {}
def __parseRows(rows, finalist: bool): def __parseRows(rows, finalist: bool):
def __parseRow(row): def __parseRow(row):
@ -32,7 +31,7 @@ class HtmlParser:
'place': place, 'place': place,
'finalist': finalist 'finalist': finalist
} }
ret[number] = participant participants[number] = participant
for row in rows: for row in rows:
__parseRow(row) __parseRow(row)
@ -54,4 +53,10 @@ class HtmlParser:
__parseRemainingTables(tables[1:]) __parseRemainingTables(tables[1:])
title = soup.find('div', class_='eventhead').table.tr.td.contents[0]
ret = {
'participants': participants,
'title': title
}
return ret return ret

View File

@ -27,64 +27,3 @@ class AllResultReader:
logging.getLogger('solo_turnier.reader.all_results').debug('Imported results from allresults.csv file: %s', ret) logging.getLogger('solo_turnier.reader.all_results').debug('Imported results from allresults.csv file: %s', ret)
return ret return ret
class ERReader:
def __init__(self, fileName: str):
self.fileName = fileName
self.l = logging.getLogger('solo_turnier.reader.wert_er')
def __parseFileContent(self, lines):
gruppe = re.compile('Startgruppe:\W(.+)').search(lines[2]).group(1).strip()
klasse = re.compile('Startklasse:\W(.+)').search(lines[3]).group(1).strip()
tanz = re.compile('Turnierart:\W(.+)').search(lines[4]).group(1).strip()
restLines = lines[5:]
# Search for first line with the name of the dance
found = -1
for i in range(len(restLines)):
if restLines[i].startswith(tanz):
found = i
break
if found == -1:
self.l.warn(f'Could not find the dance {tanz} in the result file for {gruppe} ({klasse}). This might mean that the competition was not carried out.')
return {
'gruppe': gruppe,
'klasse': klasse,
'tanz': tanz,
'finalisten': []
}
pass
# Extract the finalists
finalists = []
for i in range(found + 1, len(restLines)):
if restLines[i].startswith('TopTurnier'):
break
# self.l.debug('Parsing line "%s"', restLines[i].strip())
match = re.compile('[0-9]+').match(restLines[i].strip())
if match is None:
raise Exception('Could not parse starter number for end result table.')
else:
finalists.append(int(match.group(0)))
ret = {
'gruppe': gruppe,
'klasse': klasse,
'tanz': tanz,
'finalisten': finalists
}
self.l.debug('Extracted data for final: %s', ret)
return ret
def readFile(self):
with open(self.fileName, 'r') as fp:
lines = fp.readlines()
lines = [l.strip('\n') for l in lines]
lines = [l for l in lines if l != '']
self.l.debug('Read lines for final: %s', lines)
return self.__parseFileContent(lines)

View File

@ -1,4 +1,5 @@
{ {
"participants": {
"14": { "14": {
"name": "Max Mustermann 1", "name": "Max Mustermann 1",
"place": "1.", "place": "1.",
@ -44,4 +45,6 @@
"place": "9.", "place": "9.",
"finalist": false "finalist": false
} }
},
"title": "09.07.2022 - ETW, Solos Jun. Newc./Beg. Rumba"
} }

View File

@ -1,4 +1,6 @@
{ {
"participants": {
"14": { "14": {
"name": "Maxime Musterfrau 1", "name": "Maxime Musterfrau 1",
"place": "1.", "place": "1.",
@ -24,4 +26,6 @@
"place": "5.", "place": "5.",
"finalist": true "finalist": true
} }
},
"title": "09.07.2022 - ETW, Solos Jun. Beginner Jive"
} }

View File

@ -0,0 +1,17 @@
import os
import solo_turnier.html_locator
def test_fetchLocationCandidates():
folder = os.path.join(os.path.dirname(__file__), 'html_locator', 'export')
relFolder = os.path.relpath(folder)
locator = solo_turnier.html_locator.HtmlLocator()
candidates = locator.findCandidates(relFolder)
expected = [
'solo_turnier/tests/html_locator/export/2-bar/erg.htm',
'solo_turnier/tests/html_locator/export/3-baz/erg.htm',
'solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/erg.htm'
]
assert set(candidates) == set(expected)