diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py index 5db8900..5bd35a4 100644 --- a/src/solo_turnier/html_parser.py +++ b/src/solo_turnier/html_parser.py @@ -68,6 +68,93 @@ class HtmlParser: ret = HtmlImport(title, participants) return ret + def parsePreparationRoundString(self, text: str): + soup = BeautifulSoup(text, 'html.parser') + + title = soup.find('div', class_='eventhead').table.tr.td.contents[0] + tableData = [] + rowTitles = [] + + def __mapBr(td): + for br in td.find_all('br'): + br.replace_with('\n') + td.smooth() + return td + + def __extractTitles(table): + for row in table.find_all('tr')[1:]: + rowTitles.append(__mapBr(row.td).string) + + def __extractColumns(table): + content = [] + + def __extractContent(td): + for br in td.find_all('br'): + br.replace_with('\n') + + span = td.span + if span is not None: + span = span.extract() + meta = span.string + else: + meta = None + + td.smooth() + + return { + 'text': td.string, + 'meta': meta + } + + def __extractRow(row): + entries = [] + for entry in row.find_all('td')[1:]: + entries.append(__extractContent(entry)) + return entries + + for row in table.find_all('tr')[1:]: + content.append(__extractRow(row)) + + return content + + def __mergeColumns(columns1, columns2): + return list(map(lambda x, y: x + y, columns1, columns2)) + + extract = soup.find('div', class_='extract') + tables = extract.find_all('table', class_='tab1') + + __extractTitles(tables[0]) + tableData = __extractColumns(tables[0]) + + for table in tables[1:]: + tableData = __mergeColumns(tableData, __extractColumns(table)) + + data = { + 'titles': rowTitles, + 'table': tableData + } + + return {'title': title, 'data': data} + + def cleanPreparationRoundImport(self, data): + def __cleanTable(table): + def __cleanText(s: str): + print("cleaning string ", s) + return s.strip(' \n\xa0') + + def __cleanEntry(entry): + entry['text'] = __cleanText(entry['text']) + if entry['meta'] is not None: + entry['meta'] = __cleanText(entry['meta']) + + for row in table: + for entry in row: + print(entry) + __cleanEntry(entry) + + data['title'] = data['title'].strip() + __cleanTable(data['data']['table']) + def guessDataFromHtmlTitle(self, title): match = re.compile('.*?ETW, Solos (.*)').match(title) if match is None: diff --git a/src/solo_turnier/tests/html_parser/1/erg.htm b/src/solo_turnier/tests/html_parser/erg/1/erg.htm similarity index 100% rename from src/solo_turnier/tests/html_parser/1/erg.htm rename to src/solo_turnier/tests/html_parser/erg/1/erg.htm diff --git a/src/solo_turnier/tests/html_parser/1/expected.json b/src/solo_turnier/tests/html_parser/erg/1/expected.json similarity index 100% rename from src/solo_turnier/tests/html_parser/1/expected.json rename to src/solo_turnier/tests/html_parser/erg/1/expected.json diff --git a/src/solo_turnier/tests/html_parser/2/erg.htm b/src/solo_turnier/tests/html_parser/erg/2/erg.htm similarity index 100% rename from src/solo_turnier/tests/html_parser/2/erg.htm rename to src/solo_turnier/tests/html_parser/erg/2/erg.htm diff --git a/src/solo_turnier/tests/html_parser/2/expected.json b/src/solo_turnier/tests/html_parser/erg/2/expected.json similarity index 100% rename from src/solo_turnier/tests/html_parser/2/expected.json rename to src/solo_turnier/tests/html_parser/erg/2/expected.json diff --git a/src/solo_turnier/tests/html_parser/tabges/1/cleaned.json b/src/solo_turnier/tests/html_parser/tabges/1/cleaned.json new file mode 100644 index 0000000..d4e51d5 --- /dev/null +++ b/src/solo_turnier/tests/html_parser/tabges/1/cleaned.json @@ -0,0 +1,198 @@ +{ + "title": "19.11.2022 - ETW, Solos Kin./Jun. Sichtung Rumba", + "data": { + "titles": [ + "Wertungsrichter", + "A) WRa\nB) WRb\nC) WRc", + "Ergebnis der Sichtung", + "Startnummer", + "Platz von\nPlatz bis", + "Aufstiegspunkte", + "Startgruppe" + ], + "table": [ + [ + { + "meta": "Max Mustermann 20", + "text": "1" + }, + { + "meta": "Max Mustermann 2", + "text": "2" + }, + { + "meta": "Max Mustermann 3", + "text": "3" + }, + { + "meta": "Max Mustermann 16", + "text": "16" + }, + { + "meta": "Max Mustermann 17", + "text": "17" + }, + { + "meta": null, + "text": "" + } + ], + [ + { + "meta": null, + "text": "1,0\n1,0\n1,0" + }, + { + "meta": null, + "text": "1,0\n1,0\n1,0" + }, + { + "meta": null, + "text": "1,0\n1,0\n1,0" + }, + { + "meta": null, + "text": "1,0\n2,0\n3,0" + }, + { + "meta": null, + "text": "1,0\n1,5\n3,0" + }, + { + "meta": null, + "text": "" + } + ], + [ + { + "meta": null, + "text": "3,0" + }, + { + "meta": null, + "text": "3,0" + }, + { + "meta": null, + "text": "3,0" + }, + { + "meta": null, + "text": "6,0" + }, + { + "meta": null, + "text": "5,5" + }, + { + "meta": null, + "text": "" + } + ], + [ + { + "meta": "Max Mustermann 20", + "text": "1" + }, + { + "meta": "Max Mustermann 2", + "text": "2" + }, + { + "meta": "Max Mustermann 3", + "text": "3" + }, + { + "meta": "Max Mustermann 16", + "text": "16" + }, + { + "meta": "Max Mustermann 17", + "text": "17" + }, + { + "meta": null, + "text": "" + } + ], + [ + { + "meta": null, + "text": "Adv" + }, + { + "meta": null, + "text": "Beg" + }, + { + "meta": null, + "text": "Beg" + }, + { + "meta": null, + "text": "Beg" + }, + { + "meta": null, + "text": "Beg" + }, + { + "meta": null, + "text": "" + } + ], + [ + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "" + } + ], + [ + { + "meta": null, + "text": "Jun" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "" + } + ] + ] + } +} diff --git a/src/solo_turnier/tests/html_parser/tabges/1/expected.json b/src/solo_turnier/tests/html_parser/tabges/1/expected.json new file mode 100644 index 0000000..cffc6b3 --- /dev/null +++ b/src/solo_turnier/tests/html_parser/tabges/1/expected.json @@ -0,0 +1,198 @@ +{ + "title": "19.11.2022 - ETW, Solos Kin./Jun. Sichtung Rumba", + "data": { + "titles": [ + "Wertungsrichter", + "A) WRa\nB) WRb\nC) WRc", + "Ergebnis der Sichtung", + "Startnummer", + "Platz von\nPlatz bis", + "Aufstiegspunkte", + "Startgruppe" + ], + "table": [ + [ + { + "meta": "Max Mustermann 20", + "text": "1" + }, + { + "meta": "Max Mustermann 2", + "text": "2" + }, + { + "meta": "Max Mustermann 3", + "text": "3" + }, + { + "meta": "Max Mustermann 16", + "text": "16" + }, + { + "meta": "Max Mustermann 17", + "text": "17" + }, + { + "meta": null, + "text": "\u00a0" + } + ], + [ + { + "meta": null, + "text": "1,0\n1,0\n1,0" + }, + { + "meta": null, + "text": "1,0\n1,0\n1,0" + }, + { + "meta": null, + "text": "1,0\n1,0\n1,0" + }, + { + "meta": null, + "text": "1,0\n2,0\n3,0" + }, + { + "meta": null, + "text": "1,0\n1,5\n3,0" + }, + { + "meta": null, + "text": "\u00a0" + } + ], + [ + { + "meta": null, + "text": "3,0" + }, + { + "meta": null, + "text": "3,0" + }, + { + "meta": null, + "text": "3,0" + }, + { + "meta": null, + "text": "6,0" + }, + { + "meta": null, + "text": "5,5" + }, + { + "meta": null, + "text": "\u00a0" + } + ], + [ + { + "meta": "Max Mustermann 20", + "text": "1" + }, + { + "meta": "Max Mustermann 2", + "text": "2" + }, + { + "meta": "Max Mustermann 3", + "text": "3" + }, + { + "meta": "Max Mustermann 16", + "text": "16" + }, + { + "meta": "Max Mustermann 17", + "text": "17" + }, + { + "meta": null, + "text": "\u00a0" + } + ], + [ + { + "meta": null, + "text": "Adv\n\u00a0" + }, + { + "meta": null, + "text": "Beg\n\u00a0" + }, + { + "meta": null, + "text": "Beg\n\u00a0" + }, + { + "meta": null, + "text": "Beg\n\u00a0" + }, + { + "meta": null, + "text": "Beg\n\u00a0" + }, + { + "meta": null, + "text": "\u00a0\n\u00a0" + } + ], + [ + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "-" + }, + { + "meta": null, + "text": "\u00a0" + } + ], + [ + { + "meta": null, + "text": "Jun" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "Kin" + }, + { + "meta": null, + "text": "\u00a0" + } + ] + ] + } +} diff --git a/src/solo_turnier/tests/html_parser/tabges/1/tabges.htm b/src/solo_turnier/tests/html_parser/tabges/1/tabges.htm new file mode 100644 index 0000000..95e2695 --- /dev/null +++ b/src/solo_turnier/tests/html_parser/tabges/1/tabges.htm @@ -0,0 +1,130 @@ + + + + + + + + + + 19.11.2022 Kin./Jun. Sichtung Rumba + + + +
+
+ + +
19.11.2022 - ETW, Solos Kin./Jun. Sichtung Rumba + +
+
+
+
Wertungstabelle Gesamt
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Anzahl Teilnehmer: 18Startnummer
Wertungsrichter1Max Mustermann 202Max Mustermann 23Max Mustermann 3
A) WRa
B) WRb
C) WRc
1,0
1,0
1,0
1,0
1,0
1,0
1,0
1,0
1,0
Ergebnis der Sichtung3,03,03,0
Startnummer1Max Mustermann 202Max Mustermann 23Max Mustermann 3
Platz von
Platz bis
Adv
 
Beg
 
Beg
 
Aufstiegspunkte---
StartgruppeJunKinKin
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Anzahl Teilnehmer: 18Startnummer
Wertungsrichter16Max Mustermann 1617Max Mustermann 17 
A) WRa
B) WRb
C) WRc
1,0
2,0
3,0
1,0
1,5
3,0
 
Ergebnis der Sichtung6,05,5 
Startnummer16Max Mustermann 1617Max Mustermann 17 
Platz von
Platz bis
Beg
 
Beg
 
 
 
Aufstiegspunkte-- 
StartgruppeKinKin 
+
+
+
+
+
+

Diese Liste wurde mit TopTurnier für Windows V9.3 erstellt.
+ + diff --git a/src/solo_turnier/tests/test_html_parser.py b/src/solo_turnier/tests/test_html_parser.py index 6a70bf1..0bc1bc2 100644 --- a/src/solo_turnier/tests/test_html_parser.py +++ b/src/solo_turnier/tests/test_html_parser.py @@ -4,10 +4,10 @@ import json import solo_turnier.html_parser -@pytest.fixture(scope='module', params=["1", '2']) +@pytest.fixture(scope='module', params=range(2)) def dataProviderHtmlParser(request): - variant = request.param - dir = os.path.join(os.path.dirname(__file__), 'html_parser', variant) + variant = str(request.param+1) + dir = os.path.join(os.path.dirname(__file__), 'html_parser', 'erg', variant) htmlFile = os.path.join(dir, 'erg.htm') jsonFile = os.path.join(dir, 'expected.json') @@ -70,3 +70,49 @@ def test_guessDataFromTitle(fixture_guessDataFromTitle): ret = parser.guessDataFromHtmlTitle(fixture_guessDataFromTitle[0]) assert ret == fixture_guessDataFromTitle[1] + +@pytest.fixture(params=range(1)) +def fixture_parsePreparationResult(request): + variant = str(request.param+1) + dir = os.path.join(os.path.dirname(__file__), 'html_parser', 'tabges', variant) + htmlFile = os.path.join(dir, 'tabges.htm') + jsonFile = os.path.join(dir, 'expected.json') + + with open(htmlFile, 'r') as fp: + html = fp.read() + with open(jsonFile, 'r') as fp: + jsonContent = json.load(fp) + + return (html, jsonContent) + +def test_parsePreparationResult(fixture_parsePreparationResult): + html = fixture_parsePreparationResult[0] + jsonContent = fixture_parsePreparationResult[1] + + parser = solo_turnier.html_parser.HtmlParser() + ret = parser.parsePreparationRoundString(html) + + assert ret == jsonContent + +@pytest.fixture(params=range(1)) +def fixture_cleanPreparationImport(request): + variant = str(request.param+1) + dir = os.path.join(os.path.dirname(__file__), 'html_parser', 'tabges', variant) + srcFile = os.path.join(dir, 'expected.json') + expectedFile = os.path.join(dir, 'cleaned.json') + + with open(srcFile, 'r') as fp: + source = json.load(fp) + with open(expectedFile, 'r') as fp: + expected = json.load(fp) + + return (source, expected) + +def test_cleanPreparationImport(fixture_cleanPreparationImport): + src = fixture_cleanPreparationImport[0] + expected = fixture_cleanPreparationImport[1] + + parser = solo_turnier.html_parser.HtmlParser() + parser.cleanPreparationRoundImport(src) + + assert src == expected diff --git a/src/solo_turnier/worker.py b/src/solo_turnier/worker.py index e65314b..5cfc0d9 100644 --- a/src/solo_turnier/worker.py +++ b/src/solo_turnier/worker.py @@ -18,7 +18,7 @@ class ResultRow: self.competitionClass = competitionClass def __str__(self): - return f'{self.name} ({self.id}, {self.club}) are in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}' + return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}' class ResultPerson: def __init__(self, firstName, lastName, club, id = None, group = None):