From 0ff24494dc798658a492ba6bc4a6658c499a5740 Mon Sep 17 00:00:00 2001 From: Christian Wolf Date: Tue, 15 Nov 2022 14:19:10 +0100 Subject: [PATCH] Extract meta data from title in HTML --- src/solo_turnier/html_parser.py | 26 +++++++++++++++ src/solo_turnier/tests/test_html_parser.py | 39 ++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/src/solo_turnier/html_parser.py b/src/solo_turnier/html_parser.py index 7d628ae..11df844 100644 --- a/src/solo_turnier/html_parser.py +++ b/src/solo_turnier/html_parser.py @@ -60,3 +60,29 @@ class HtmlParser: 'title': title } return ret + + def guessDataFromHtmlTitle(self, title): + match = re.compile('.*?ETW, Solos (.*)').match(title) + if match is None: + raise Exception(f'Cannot parse title "{title}"') + + rest = match.group(1) + rawGroup, rawClass, dance = rest.split(' ', 2) + + classMap = { + 'Newcomer': 'Newc.', + 'Beginner': 'Beg.', + 'Advanced': 'Adv.' + } + + groupMap = { + 'Kinder': 'Kin.', + 'Junioren': 'Jun.', + 'Jugend': 'Jug.', + } + + return { + 'dance': dance, + 'class_': classMap.get(rawClass, rawClass), + 'group': groupMap.get(rawGroup, rawGroup) + } diff --git a/src/solo_turnier/tests/test_html_parser.py b/src/solo_turnier/tests/test_html_parser.py index 3680e86..c24c6c5 100644 --- a/src/solo_turnier/tests/test_html_parser.py +++ b/src/solo_turnier/tests/test_html_parser.py @@ -26,3 +26,42 @@ def test_extractDataFromHtml(dataProviderHtmlParser): actualResult = parser.parseString(htmlString) assert actualResult == expected + +@pytest.fixture(params=range(5)) +def fixture_guessDataFromTitle(request): + cases = { + '09.07.2022 - ETW, Solos Jun. Beginner Jive': { + 'class_': 'Beg.', + 'dance': 'Jive', + 'group': 'Jun.' + }, + '09.07.2022 - ETW, Solos Jun. Newc./Beg. Rumba': { + 'class_': 'Newc./Beg.', + 'dance': 'Rumba', + 'group': 'Jun.' + }, + '09.07.2022 - ETW, Solos Kin./Jun. Beginner Cha Cha': { + 'class_': 'Beg.', + 'dance': 'Cha Cha', + 'group': 'Kin./Jun.' + }, + '09.07.2022 - ETW, Solos Kin. Newcomer Samba': { + 'class_': 'Newc.', + 'dance': 'Samba', + 'group': 'Kin.' + }, + '09.07.2022 - ETW, Solos Jugend Beg./Adv. Wiener Walzer': { + 'class_': 'Beg./Adv.', + 'dance': 'Wiener Walzer', + 'group': 'Jug.' + }, + } + keys = list(cases.keys()) + key = keys[request.param] + return (key, cases[key]) + +def test_guessDataFromTitle(fixture_guessDataFromTitle): + parser = solo_turnier.html_parser.HtmlParser() + ret = parser.guessDataFromHtmlTitle(fixture_guessDataFromTitle[0]) + + assert ret == fixture_guessDataFromTitle[1]