From 9d88d09a971e1facb22ce4038efadb24d0837c4e Mon Sep 17 00:00:00 2001 From: Christian Wolf Date: Sun, 13 Nov 2022 18:04:49 +0100 Subject: [PATCH] Create class to look for possible result sets --- src/solo_turnier/html_locator.py | 24 +++++ src/solo_turnier/html_parser.py | 11 ++- src/solo_turnier/reader.py | 61 ------------- .../tests/html_locator/export/1-foo/deck.html | 0 .../tests/html_locator/export/1-foo/index.htm | 0 .../tests/html_locator/export/1-foo/menu.html | 0 .../tests/html_locator/export/2-bar/deck.html | 0 .../tests/html_locator/export/2-bar/erg.htm | 0 .../tests/html_locator/export/2-bar/index.htm | 0 .../tests/html_locator/export/2-bar/menu.html | 0 .../tests/html_locator/export/3-baz/deck.html | 0 .../tests/html_locator/export/3-baz/erg.htm | 0 .../tests/html_locator/export/3-baz/index.htm | 0 .../tests/html_locator/export/3-baz/menu.html | 0 .../export/3-baz/subfolder/4-baz/deck.html | 0 .../export/3-baz/subfolder/4-baz/erg.htm | 0 .../export/3-baz/subfolder/4-baz/index.htm | 0 .../export/3-baz/subfolder/4-baz/menu.html | 0 .../tests/html_parser/1/expected.json | 91 ++++++++++--------- .../tests/html_parser/2/expected.json | 52 ++++++----- src/solo_turnier/tests/test_html_locator.py | 17 ++++ 21 files changed, 124 insertions(+), 132 deletions(-) create mode 100644 src/solo_turnier/html_locator.py create mode 100644 src/solo_turnier/tests/html_locator/export/1-foo/deck.html create mode 100644 src/solo_turnier/tests/html_locator/export/1-foo/index.htm create mode 100644 src/solo_turnier/tests/html_locator/export/1-foo/menu.html create mode 100644 src/solo_turnier/tests/html_locator/export/2-bar/deck.html create mode 100644 src/solo_turnier/tests/html_locator/export/2-bar/erg.htm create mode 100644 src/solo_turnier/tests/html_locator/export/2-bar/index.htm create mode 100644 src/solo_turnier/tests/html_locator/export/2-bar/menu.html create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/deck.html create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/erg.htm create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/index.htm create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/menu.html create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/deck.html create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/erg.htm create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/index.htm create mode 100644 src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/menu.html create mode 100644 src/solo_turnier/tests/test_html_locator.py diff --git a/src/solo_turnier/html_locator.py b/src/solo_turnier/html_locator.py new file mode 100644 index 0000000..e4be68f --- /dev/null +++ b/src/solo_turnier/html_locator.py @@ -0,0 +1,24 @@ +import os +import logging + +class HtmlLocator: + def __init__(self): + self.l = logging.getLogger('solo_turnier.html_locator') + self.fileName = 'erg.htm' + + def __findRecursivelyCandidates(self, path: str): + ret = [] + ls = os.listdir(path) + + if self.fileName in ls and os.path.isfile(os.path.join(path, self.fileName)): + ret.append(os.path.join(path, self.fileName)) + + for p in ls: + subPath = os.path.join(path, p) + if os.path.isdir(subPath): + ret = ret + self.__findRecursivelyCandidates(subPath) + + return ret + + def findCandidates(self, path: str): + return self.__findRecursivelyCandidates(path) diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py index ce25f2e..7d628ae 100644 --- a/src/solo_turnier/html_parser.py +++ b/src/solo_turnier/html_parser.py @@ -1,5 +1,4 @@ from bs4 import BeautifulSoup -import bs4 import logging import re @@ -12,7 +11,7 @@ class HtmlParser: def parseString(self, text: str): soup = BeautifulSoup(text, 'html.parser') - ret = {} + participants = {} def __parseRows(rows, finalist: bool): def __parseRow(row): @@ -32,7 +31,7 @@ class HtmlParser: 'place': place, 'finalist': finalist } - ret[number] = participant + participants[number] = participant for row in rows: __parseRow(row) @@ -54,4 +53,10 @@ class HtmlParser: __parseRemainingTables(tables[1:]) + title = soup.find('div', class_='eventhead').table.tr.td.contents[0] + + ret = { + 'participants': participants, + 'title': title + } return ret diff --git a/src/solo_turnier/reader.py b/src/solo_turnier/reader.py index 8bef531..dac7720 100644 --- a/src/solo_turnier/reader.py +++ b/src/solo_turnier/reader.py @@ -27,64 +27,3 @@ class AllResultReader: logging.getLogger('solo_turnier.reader.all_results').debug('Imported results from allresults.csv file: %s', ret) return ret - -class ERReader: - def __init__(self, fileName: str): - self.fileName = fileName - self.l = logging.getLogger('solo_turnier.reader.wert_er') - - def __parseFileContent(self, lines): - gruppe = re.compile('Startgruppe:\W(.+)').search(lines[2]).group(1).strip() - klasse = re.compile('Startklasse:\W(.+)').search(lines[3]).group(1).strip() - tanz = re.compile('Turnierart:\W(.+)').search(lines[4]).group(1).strip() - - restLines = lines[5:] - - # Search for first line with the name of the dance - found = -1 - for i in range(len(restLines)): - if restLines[i].startswith(tanz): - found = i - break - - if found == -1: - self.l.warn(f'Could not find the dance {tanz} in the result file for {gruppe} ({klasse}). This might mean that the competition was not carried out.') - return { - 'gruppe': gruppe, - 'klasse': klasse, - 'tanz': tanz, - 'finalisten': [] - } - pass - - # Extract the finalists - finalists = [] - for i in range(found + 1, len(restLines)): - if restLines[i].startswith('TopTurnier'): - break - - # self.l.debug('Parsing line "%s"', restLines[i].strip()) - - match = re.compile('[0-9]+').match(restLines[i].strip()) - if match is None: - raise Exception('Could not parse starter number for end result table.') - else: - finalists.append(int(match.group(0))) - - ret = { - 'gruppe': gruppe, - 'klasse': klasse, - 'tanz': tanz, - 'finalisten': finalists - } - self.l.debug('Extracted data for final: %s', ret) - return ret - - def readFile(self): - with open(self.fileName, 'r') as fp: - lines = fp.readlines() - lines = [l.strip('\n') for l in lines] - lines = [l for l in lines if l != ''] - self.l.debug('Read lines for final: %s', lines) - - return self.__parseFileContent(lines) diff --git a/src/solo_turnier/tests/html_locator/export/1-foo/deck.html b/src/solo_turnier/tests/html_locator/export/1-foo/deck.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/1-foo/index.htm b/src/solo_turnier/tests/html_locator/export/1-foo/index.htm new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/1-foo/menu.html b/src/solo_turnier/tests/html_locator/export/1-foo/menu.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/2-bar/deck.html b/src/solo_turnier/tests/html_locator/export/2-bar/deck.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/2-bar/erg.htm b/src/solo_turnier/tests/html_locator/export/2-bar/erg.htm new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/2-bar/index.htm b/src/solo_turnier/tests/html_locator/export/2-bar/index.htm new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/2-bar/menu.html b/src/solo_turnier/tests/html_locator/export/2-bar/menu.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/deck.html b/src/solo_turnier/tests/html_locator/export/3-baz/deck.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/erg.htm b/src/solo_turnier/tests/html_locator/export/3-baz/erg.htm new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/index.htm b/src/solo_turnier/tests/html_locator/export/3-baz/index.htm new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/menu.html b/src/solo_turnier/tests/html_locator/export/3-baz/menu.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/deck.html b/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/deck.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/erg.htm b/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/erg.htm new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/index.htm b/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/index.htm new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/menu.html b/src/solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/menu.html new file mode 100644 index 0000000..e69de29 diff --git a/src/solo_turnier/tests/html_parser/1/expected.json b/src/solo_turnier/tests/html_parser/1/expected.json index 7438fa7..8b8b39b 100644 --- a/src/solo_turnier/tests/html_parser/1/expected.json +++ b/src/solo_turnier/tests/html_parser/1/expected.json @@ -1,47 +1,50 @@ { - "14": { - "name": "Max Mustermann 1", - "place": "1.", - "finalist": true + "participants": { + "14": { + "name": "Max Mustermann 1", + "place": "1.", + "finalist": true + }, + "9": { + "name": "Max Mustermann 2", + "place": "2.", + "finalist": true + }, + "13": { + "name": "Max Mustermann 3", + "place": "3.", + "finalist": true + }, + "17": { + "name": "Max Mustermann 4", + "place": "4.", + "finalist": true + }, + "6": { + "name": "Max Mustermann 5", + "place": "5.", + "finalist": true + }, + "27": { + "name": "Max Mustermann 6", + "place": "6.", + "finalist": true + }, + "22": { + "name": "Max Mustermann 7", + "place": "7.", + "finalist": true + }, + "26": { + "name": "Max Mustermann 8", + "place": "8.", + "finalist": false + }, + "25": { + "name": "Max Mustermann 9", + "place": "9.", + "finalist": false + } }, - "9": { - "name": "Max Mustermann 2", - "place": "2.", - "finalist": true - }, - "13": { - "name": "Max Mustermann 3", - "place": "3.", - "finalist": true - }, - "17": { - "name": "Max Mustermann 4", - "place": "4.", - "finalist": true - }, - "6": { - "name": "Max Mustermann 5", - "place": "5.", - "finalist": true - }, - "27": { - "name": "Max Mustermann 6", - "place": "6.", - "finalist": true - }, - "22": { - "name": "Max Mustermann 7", - "place": "7.", - "finalist": true - }, - "26": { - "name": "Max Mustermann 8", - "place": "8.", - "finalist": false - }, - "25": { - "name": "Max Mustermann 9", - "place": "9.", - "finalist": false - } + "title": "09.07.2022 - ETW, Solos Jun. Newc./Beg. Rumba" } diff --git a/src/solo_turnier/tests/html_parser/2/expected.json b/src/solo_turnier/tests/html_parser/2/expected.json index 4f7a89c..0032e44 100644 --- a/src/solo_turnier/tests/html_parser/2/expected.json +++ b/src/solo_turnier/tests/html_parser/2/expected.json @@ -1,27 +1,31 @@ { - "14": { - "name": "Maxime Musterfrau 1", - "place": "1.", - "finalist": true + "participants": { + + "14": { + "name": "Maxime Musterfrau 1", + "place": "1.", + "finalist": true + }, + "13": { + "name": "Maxime Musterfrau 2", + "place": "2.", + "finalist": true + }, + "17": { + "name": "Maxime Musterfrau 3", + "place": "3.", + "finalist": true + }, + "6": { + "name": "Maxime Musterfrau 4", + "place": "4.", + "finalist": true + }, + "22": { + "name": "Maxime Musterfrau 5", + "place": "5.", + "finalist": true + } }, - "13": { - "name": "Maxime Musterfrau 2", - "place": "2.", - "finalist": true - }, - "17": { - "name": "Maxime Musterfrau 3", - "place": "3.", - "finalist": true - }, - "6": { - "name": "Maxime Musterfrau 4", - "place": "4.", - "finalist": true - }, - "22": { - "name": "Maxime Musterfrau 5", - "place": "5.", - "finalist": true - } + "title": "09.07.2022 - ETW, Solos Jun. Beginner Jive" } diff --git a/src/solo_turnier/tests/test_html_locator.py b/src/solo_turnier/tests/test_html_locator.py new file mode 100644 index 0000000..a24a514 --- /dev/null +++ b/src/solo_turnier/tests/test_html_locator.py @@ -0,0 +1,17 @@ + +import os +import solo_turnier.html_locator + +def test_fetchLocationCandidates(): + folder = os.path.join(os.path.dirname(__file__), 'html_locator', 'export') + relFolder = os.path.relpath(folder) + + locator = solo_turnier.html_locator.HtmlLocator() + candidates = locator.findCandidates(relFolder) + + expected = [ + 'solo_turnier/tests/html_locator/export/2-bar/erg.htm', + 'solo_turnier/tests/html_locator/export/3-baz/erg.htm', + 'solo_turnier/tests/html_locator/export/3-baz/subfolder/4-baz/erg.htm' + ] + assert set(candidates) == set(expected)