Extract meta data from title in HTML
This commit is contained in:
parent
80d0269cb2
commit
0ff24494dc
@ -60,3 +60,29 @@ class HtmlParser:
|
||||
'title': title
|
||||
}
|
||||
return ret
|
||||
|
||||
def guessDataFromHtmlTitle(self, title):
|
||||
match = re.compile('.*?ETW, Solos (.*)').match(title)
|
||||
if match is None:
|
||||
raise Exception(f'Cannot parse title "{title}"')
|
||||
|
||||
rest = match.group(1)
|
||||
rawGroup, rawClass, dance = rest.split(' ', 2)
|
||||
|
||||
classMap = {
|
||||
'Newcomer': 'Newc.',
|
||||
'Beginner': 'Beg.',
|
||||
'Advanced': 'Adv.'
|
||||
}
|
||||
|
||||
groupMap = {
|
||||
'Kinder': 'Kin.',
|
||||
'Junioren': 'Jun.',
|
||||
'Jugend': 'Jug.',
|
||||
}
|
||||
|
||||
return {
|
||||
'dance': dance,
|
||||
'class_': classMap.get(rawClass, rawClass),
|
||||
'group': groupMap.get(rawGroup, rawGroup)
|
||||
}
|
||||
|
@ -26,3 +26,42 @@ def test_extractDataFromHtml(dataProviderHtmlParser):
|
||||
actualResult = parser.parseString(htmlString)
|
||||
|
||||
assert actualResult == expected
|
||||
|
||||
@pytest.fixture(params=range(5))
|
||||
def fixture_guessDataFromTitle(request):
|
||||
cases = {
|
||||
'09.07.2022 - ETW, Solos Jun. Beginner Jive': {
|
||||
'class_': 'Beg.',
|
||||
'dance': 'Jive',
|
||||
'group': 'Jun.'
|
||||
},
|
||||
'09.07.2022 - ETW, Solos Jun. Newc./Beg. Rumba': {
|
||||
'class_': 'Newc./Beg.',
|
||||
'dance': 'Rumba',
|
||||
'group': 'Jun.'
|
||||
},
|
||||
'09.07.2022 - ETW, Solos Kin./Jun. Beginner Cha Cha': {
|
||||
'class_': 'Beg.',
|
||||
'dance': 'Cha Cha',
|
||||
'group': 'Kin./Jun.'
|
||||
},
|
||||
'09.07.2022 - ETW, Solos Kin. Newcomer Samba': {
|
||||
'class_': 'Newc.',
|
||||
'dance': 'Samba',
|
||||
'group': 'Kin.'
|
||||
},
|
||||
'09.07.2022 - ETW, Solos Jugend Beg./Adv. Wiener Walzer': {
|
||||
'class_': 'Beg./Adv.',
|
||||
'dance': 'Wiener Walzer',
|
||||
'group': 'Jug.'
|
||||
},
|
||||
}
|
||||
keys = list(cases.keys())
|
||||
key = keys[request.param]
|
||||
return (key, cases[key])
|
||||
|
||||
def test_guessDataFromTitle(fixture_guessDataFromTitle):
|
||||
parser = solo_turnier.html_parser.HtmlParser()
|
||||
ret = parser.guessDataFromHtmlTitle(fixture_guessDataFromTitle[0])
|
||||
|
||||
assert ret == fixture_guessDataFromTitle[1]
|
||||
|
Loading…
Reference in New Issue
Block a user