diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py
index 5bd35a4..9224d31 100644
--- a/src/solo_turnier/html_parser.py
+++ b/src/solo_turnier/html_parser.py
@@ -19,9 +19,42 @@ class HtmlImport:
class HtmlParser:
- def __init__(self):
+ def __init__(self, text: str):
self.l = logging.getLogger('solo_turnier.html_parser')
+ self.soup = BeautifulSoup(text, 'html.parser')
+ def getEventTitle(self):
+ return self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
+
+ def guessDataFromHtmlTitle(self, title = None):
+ if title is None:
+ title = self.getEventTitle()
+
+ match = re.compile('.*?ETW, Solos (.*)').match(title)
+ if match is None:
+ raise Exception(f'Cannot parse title "{title}"')
+
+ rest = match.group(1)
+ rawGroup, rawClass, dance = rest.split(' ', 2)
+
+ classMap = {
+ 'Newcomer': 'Newc.',
+ 'Beginner': 'Beg.',
+ 'Advanced': 'Adv.'
+ }
+
+ groupMap = {
+ 'Kinder': 'Kin.',
+ 'Junioren': 'Jun.',
+ 'Jugend': 'Jug.',
+ }
+
+ return {
+ 'dance': dance.strip(),
+ 'class_': classMap.get(rawClass, rawClass),
+ 'group': groupMap.get(rawGroup, rawGroup)
+ }
+
def parseString(self, text: str):
soup = BeautifulSoup(text, 'html.parser')
@@ -68,10 +101,8 @@ class HtmlParser:
ret = HtmlImport(title, participants)
return ret
- def parsePreparationRoundString(self, text: str):
- soup = BeautifulSoup(text, 'html.parser')
-
- title = soup.find('div', class_='eventhead').table.tr.td.contents[0]
+ def parsePreparationRound(self):
+ title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
tableData = []
rowTitles = []
@@ -120,7 +151,7 @@ class HtmlParser:
def __mergeColumns(columns1, columns2):
return list(map(lambda x, y: x + y, columns1, columns2))
- extract = soup.find('div', class_='extract')
+ extract = self.soup.find('div', class_='extract')
tables = extract.find_all('table', class_='tab1')
__extractTitles(tables[0])
@@ -155,28 +186,3 @@ class HtmlParser:
data['title'] = data['title'].strip()
__cleanTable(data['data']['table'])
- def guessDataFromHtmlTitle(self, title):
- match = re.compile('.*?ETW, Solos (.*)').match(title)
- if match is None:
- raise Exception(f'Cannot parse title "{title}"')
-
- rest = match.group(1)
- rawGroup, rawClass, dance = rest.split(' ', 2)
-
- classMap = {
- 'Newcomer': 'Newc.',
- 'Beginner': 'Beg.',
- 'Advanced': 'Adv.'
- }
-
- groupMap = {
- 'Kinder': 'Kin.',
- 'Junioren': 'Jun.',
- 'Jugend': 'Jug.',
- }
-
- return {
- 'dance': dance.strip(),
- 'class_': classMap.get(rawClass, rawClass),
- 'group': groupMap.get(rawGroup, rawGroup)
- }
diff --git a/src/solo_turnier/tests/test_html_parser.py b/src/solo_turnier/tests/test_html_parser.py
index 0bc1bc2..2f79882 100644
--- a/src/solo_turnier/tests/test_html_parser.py
+++ b/src/solo_turnier/tests/test_html_parser.py
@@ -22,7 +22,7 @@ def test_extractDataFromHtml(dataProviderHtmlParser):
htmlString = dataProviderHtmlParser[0]
expected = dataProviderHtmlParser[1]
- parser = solo_turnier.html_parser.HtmlParser()
+ parser = solo_turnier.html_parser.HtmlParser(htmlString)
actualResult = parser.parseString(htmlString)
participants = {}
@@ -66,7 +66,7 @@ def fixture_guessDataFromTitle(request):
return (key, cases[key])
def test_guessDataFromTitle(fixture_guessDataFromTitle):
- parser = solo_turnier.html_parser.HtmlParser()
+ parser = solo_turnier.html_parser.HtmlParser('')
ret = parser.guessDataFromHtmlTitle(fixture_guessDataFromTitle[0])
assert ret == fixture_guessDataFromTitle[1]
@@ -89,8 +89,8 @@ def test_parsePreparationResult(fixture_parsePreparationResult):
html = fixture_parsePreparationResult[0]
jsonContent = fixture_parsePreparationResult[1]
- parser = solo_turnier.html_parser.HtmlParser()
- ret = parser.parsePreparationRoundString(html)
+ parser = solo_turnier.html_parser.HtmlParser(html)
+ ret = parser.parsePreparationRound()
assert ret == jsonContent
@@ -112,7 +112,7 @@ def test_cleanPreparationImport(fixture_cleanPreparationImport):
src = fixture_cleanPreparationImport[0]
expected = fixture_cleanPreparationImport[1]
- parser = solo_turnier.html_parser.HtmlParser()
+ parser = solo_turnier.html_parser.HtmlParser('')
parser.cleanPreparationRoundImport(src)
assert src == expected
diff --git a/src/solo_turnier/worker.py b/src/solo_turnier/worker.py
index 5cfc0d9..ab7e9e0 100644
--- a/src/solo_turnier/worker.py
+++ b/src/solo_turnier/worker.py
@@ -221,7 +221,7 @@ class DataWorker:
def _createHtmlLUT(self, htmlImports: list[html_parser.HtmlImport]):
ret = {}
- parser = html_parser.HtmlParser()
+ parser = html_parser.HtmlParser('')
for imp in htmlImports:
parsed = parser.guessDataFromHtmlTitle(imp.title)
key = (parsed['group'], parsed['class_'], parsed['dance'])