Extract meta data from title in HTML

This commit is contained in:
Christian Wolf 2022-11-15 14:19:10 +01:00
parent 80d0269cb2
commit 0ff24494dc
2 changed files with 65 additions and 0 deletions

View File

@ -60,3 +60,29 @@ class HtmlParser:
'title': title
}
return ret
def guessDataFromHtmlTitle(self, title):
match = re.compile('.*?ETW, Solos (.*)').match(title)
if match is None:
raise Exception(f'Cannot parse title "{title}"')
rest = match.group(1)
rawGroup, rawClass, dance = rest.split(' ', 2)
classMap = {
'Newcomer': 'Newc.',
'Beginner': 'Beg.',
'Advanced': 'Adv.'
}
groupMap = {
'Kinder': 'Kin.',
'Junioren': 'Jun.',
'Jugend': 'Jug.',
}
return {
'dance': dance,
'class_': classMap.get(rawClass, rawClass),
'group': groupMap.get(rawGroup, rawGroup)
}

View File

@ -26,3 +26,42 @@ def test_extractDataFromHtml(dataProviderHtmlParser):
actualResult = parser.parseString(htmlString)
assert actualResult == expected
@pytest.fixture(params=range(5))
def fixture_guessDataFromTitle(request):
cases = {
'09.07.2022 - ETW, Solos Jun. Beginner Jive': {
'class_': 'Beg.',
'dance': 'Jive',
'group': 'Jun.'
},
'09.07.2022 - ETW, Solos Jun. Newc./Beg. Rumba': {
'class_': 'Newc./Beg.',
'dance': 'Rumba',
'group': 'Jun.'
},
'09.07.2022 - ETW, Solos Kin./Jun. Beginner Cha Cha': {
'class_': 'Beg.',
'dance': 'Cha Cha',
'group': 'Kin./Jun.'
},
'09.07.2022 - ETW, Solos Kin. Newcomer Samba': {
'class_': 'Newc.',
'dance': 'Samba',
'group': 'Kin.'
},
'09.07.2022 - ETW, Solos Jugend Beg./Adv. Wiener Walzer': {
'class_': 'Beg./Adv.',
'dance': 'Wiener Walzer',
'group': 'Jug.'
},
}
keys = list(cases.keys())
key = keys[request.param]
return (key, cases[key])
def test_guessDataFromTitle(fixture_guessDataFromTitle):
parser = solo_turnier.html_parser.HtmlParser()
ret = parser.guessDataFromHtmlTitle(fixture_guessDataFromTitle[0])
assert ret == fixture_guessDataFromTitle[1]