diff --git a/auswertung.code-workspace b/auswertung.code-workspace
index 57d88ef..207029b 100644
--- a/auswertung.code-workspace
+++ b/auswertung.code-workspace
@@ -1,11 +1,17 @@
{
"folders": [
{
- "path": "."
+ "path": ".",
+ "name": "code"
},
{
"path": "../../../../../nextcloud/Documents/Projekte/SLT/Auswertungsskript Solo"
}
],
- "settings": {}
+ "settings": {
+ "python.testing.pytestEnabled": false,
+ "python.autoComplete.extraPaths": [
+ "${workspaceFolder:code}/venv/lib"
+ ]
+ }
}
diff --git a/requiremnts.txt b/requiremnts.txt
index a00a1bb..af69cc1 100644
--- a/requiremnts.txt
+++ b/requiremnts.txt
@@ -1 +1,3 @@
+beautifulsoup4==4.11.1
+soupsieve==2.3.2.post1
tabulate==0.9.0
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 0000000..1bc5aa1
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1 @@
+/.coverage
diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py
new file mode 100644
index 0000000..ce25f2e
--- /dev/null
+++ b/src/solo_turnier/html_parser.py
@@ -0,0 +1,57 @@
+from bs4 import BeautifulSoup
+import bs4
+
+import logging
+import re
+
+class HtmlParser:
+
+ def __init__(self):
+ self.l = logging.getLogger('solo_turnier.html_parser')
+
+ def parseString(self, text: str):
+ soup = BeautifulSoup(text, 'html.parser')
+
+ ret = {}
+
+ def __parseRows(rows, finalist: bool):
+ def __parseRow(row):
+ tds = row.find_all('td')
+ regex = re.compile('(.*) \\(([0-9]+)\\)')
+
+ place = tds[0].contents[0]
+
+ match = regex.fullmatch(tds[1].contents[0])
+ if match is None:
+ raise Exception(f'Could not match {tds} to regex search pattern')
+ name = match.group(1)
+ number = match.group(2)
+
+ participant = {
+ 'name': name,
+ 'place': place,
+ 'finalist': finalist
+ }
+ ret[number] = participant
+
+ for row in rows:
+ __parseRow(row)
+
+ def __parseFirstTable(table):
+ roundName = table.tr.td.contents[0]
+ if roundName != 'Endrunde':
+ raise Exception('Could not parse HTML file')
+
+ __parseRows(table.find_all('tr')[2:], True)
+
+ def __parseRemainingTables(tables):
+ for table in tables:
+ __parseRows(table.find_all('tr')[2:], False)
+
+ tables = soup.find('div', class_='extract').find_all('table')
+ if len(tables) > 0:
+ __parseFirstTable(tables[0])
+
+ __parseRemainingTables(tables[1:])
+
+ return ret
diff --git a/src/solo_turnier/tests/__init__.py b/src/solo_turnier/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/solo_turnier/tests/html_parser/1/erg.htm b/src/solo_turnier/tests/html_parser/1/erg.htm
new file mode 100644
index 0000000..e9d4a3c
--- /dev/null
+++ b/src/solo_turnier/tests/html_parser/1/erg.htm
@@ -0,0 +1,85 @@
+
+
+
+
+
+
+
+
+
+ 09.07.2022 Jun. Newc./Beg. Rumba
+
+
+
+
+
+
+09.07.2022 - ETW, Solos Jun. Newc./Beg. Rumba |
+≡
+ |
+
+
+
+
+ Diese Liste wurde mit TopTurnier für Windows V9.3 erstellt.
+
+
diff --git a/src/solo_turnier/tests/html_parser/1/expected.json b/src/solo_turnier/tests/html_parser/1/expected.json
new file mode 100644
index 0000000..7438fa7
--- /dev/null
+++ b/src/solo_turnier/tests/html_parser/1/expected.json
@@ -0,0 +1,47 @@
+{
+ "14": {
+ "name": "Max Mustermann 1",
+ "place": "1.",
+ "finalist": true
+ },
+ "9": {
+ "name": "Max Mustermann 2",
+ "place": "2.",
+ "finalist": true
+ },
+ "13": {
+ "name": "Max Mustermann 3",
+ "place": "3.",
+ "finalist": true
+ },
+ "17": {
+ "name": "Max Mustermann 4",
+ "place": "4.",
+ "finalist": true
+ },
+ "6": {
+ "name": "Max Mustermann 5",
+ "place": "5.",
+ "finalist": true
+ },
+ "27": {
+ "name": "Max Mustermann 6",
+ "place": "6.",
+ "finalist": true
+ },
+ "22": {
+ "name": "Max Mustermann 7",
+ "place": "7.",
+ "finalist": true
+ },
+ "26": {
+ "name": "Max Mustermann 8",
+ "place": "8.",
+ "finalist": false
+ },
+ "25": {
+ "name": "Max Mustermann 9",
+ "place": "9.",
+ "finalist": false
+ }
+}
diff --git a/src/solo_turnier/tests/html_parser/2/erg.htm b/src/solo_turnier/tests/html_parser/2/erg.htm
new file mode 100644
index 0000000..97402ec
--- /dev/null
+++ b/src/solo_turnier/tests/html_parser/2/erg.htm
@@ -0,0 +1,63 @@
+
+
+
+
+
+
+
+
+
+ 09.07.2022 Jun. Beginner Jive
+
+
+
+
+
+
+09.07.2022 - ETW, Solos Jun. Beginner Jive |
+≡
+ |
+
+
+
+
+ Diese Liste wurde mit TopTurnier für Windows V9.3 erstellt.
+
+
diff --git a/src/solo_turnier/tests/html_parser/2/expected.json b/src/solo_turnier/tests/html_parser/2/expected.json
new file mode 100644
index 0000000..4f7a89c
--- /dev/null
+++ b/src/solo_turnier/tests/html_parser/2/expected.json
@@ -0,0 +1,27 @@
+{
+ "14": {
+ "name": "Maxime Musterfrau 1",
+ "place": "1.",
+ "finalist": true
+ },
+ "13": {
+ "name": "Maxime Musterfrau 2",
+ "place": "2.",
+ "finalist": true
+ },
+ "17": {
+ "name": "Maxime Musterfrau 3",
+ "place": "3.",
+ "finalist": true
+ },
+ "6": {
+ "name": "Maxime Musterfrau 4",
+ "place": "4.",
+ "finalist": true
+ },
+ "22": {
+ "name": "Maxime Musterfrau 5",
+ "place": "5.",
+ "finalist": true
+ }
+}
diff --git a/src/solo_turnier/tests/test_html_parser.py b/src/solo_turnier/tests/test_html_parser.py
new file mode 100644
index 0000000..3680e86
--- /dev/null
+++ b/src/solo_turnier/tests/test_html_parser.py
@@ -0,0 +1,28 @@
+import pytest
+import os
+import json
+
+import solo_turnier.html_parser
+
+@pytest.fixture(scope='module', params=["1", '2'])
+def dataProviderHtmlParser(request):
+ variant = request.param
+ dir = os.path.join(os.path.dirname(__file__), 'html_parser', variant)
+ htmlFile = os.path.join(dir, 'erg.htm')
+ jsonFile = os.path.join(dir, 'expected.json')
+
+ with open(htmlFile, 'r') as fp:
+ html = fp.read()
+ with open(jsonFile, 'r') as fp:
+ jsonContent = json.load(fp)
+
+ return (html, jsonContent)
+
+def test_extractDataFromHtml(dataProviderHtmlParser):
+ htmlString = dataProviderHtmlParser[0]
+ expected = dataProviderHtmlParser[1]
+
+ parser = solo_turnier.html_parser.HtmlParser()
+ actualResult = parser.parseString(htmlString)
+
+ assert actualResult == expected