Create script to read setup rounds

This commit is contained in:
Christian Wolf 2022-11-19 07:38:22 +01:00
parent ffba4087e7
commit 4837e54081
10 changed files with 663 additions and 4 deletions

View File

@ -68,6 +68,93 @@ class HtmlParser:
ret = HtmlImport(title, participants) ret = HtmlImport(title, participants)
return ret return ret
def parsePreparationRoundString(self, text: str):
soup = BeautifulSoup(text, 'html.parser')
title = soup.find('div', class_='eventhead').table.tr.td.contents[0]
tableData = []
rowTitles = []
def __mapBr(td):
for br in td.find_all('br'):
br.replace_with('\n')
td.smooth()
return td
def __extractTitles(table):
for row in table.find_all('tr')[1:]:
rowTitles.append(__mapBr(row.td).string)
def __extractColumns(table):
content = []
def __extractContent(td):
for br in td.find_all('br'):
br.replace_with('\n')
span = td.span
if span is not None:
span = span.extract()
meta = span.string
else:
meta = None
td.smooth()
return {
'text': td.string,
'meta': meta
}
def __extractRow(row):
entries = []
for entry in row.find_all('td')[1:]:
entries.append(__extractContent(entry))
return entries
for row in table.find_all('tr')[1:]:
content.append(__extractRow(row))
return content
def __mergeColumns(columns1, columns2):
return list(map(lambda x, y: x + y, columns1, columns2))
extract = soup.find('div', class_='extract')
tables = extract.find_all('table', class_='tab1')
__extractTitles(tables[0])
tableData = __extractColumns(tables[0])
for table in tables[1:]:
tableData = __mergeColumns(tableData, __extractColumns(table))
data = {
'titles': rowTitles,
'table': tableData
}
return {'title': title, 'data': data}
def cleanPreparationRoundImport(self, data):
def __cleanTable(table):
def __cleanText(s: str):
print("cleaning string ", s)
return s.strip(' \n\xa0')
def __cleanEntry(entry):
entry['text'] = __cleanText(entry['text'])
if entry['meta'] is not None:
entry['meta'] = __cleanText(entry['meta'])
for row in table:
for entry in row:
print(entry)
__cleanEntry(entry)
data['title'] = data['title'].strip()
__cleanTable(data['data']['table'])
def guessDataFromHtmlTitle(self, title): def guessDataFromHtmlTitle(self, title):
match = re.compile('.*?ETW, Solos (.*)').match(title) match = re.compile('.*?ETW, Solos (.*)').match(title)
if match is None: if match is None:

View File

@ -0,0 +1,198 @@
{
"title": "19.11.2022 - ETW, Solos Kin./Jun. Sichtung Rumba",
"data": {
"titles": [
"Wertungsrichter",
"A) WRa\nB) WRb\nC) WRc",
"Ergebnis der Sichtung",
"Startnummer",
"Platz von\nPlatz bis",
"Aufstiegspunkte",
"Startgruppe"
],
"table": [
[
{
"meta": "Max Mustermann 20",
"text": "1"
},
{
"meta": "Max Mustermann 2",
"text": "2"
},
{
"meta": "Max Mustermann 3",
"text": "3"
},
{
"meta": "Max Mustermann 16",
"text": "16"
},
{
"meta": "Max Mustermann 17",
"text": "17"
},
{
"meta": null,
"text": ""
}
],
[
{
"meta": null,
"text": "1,0\n1,0\n1,0"
},
{
"meta": null,
"text": "1,0\n1,0\n1,0"
},
{
"meta": null,
"text": "1,0\n1,0\n1,0"
},
{
"meta": null,
"text": "1,0\n2,0\n3,0"
},
{
"meta": null,
"text": "1,0\n1,5\n3,0"
},
{
"meta": null,
"text": ""
}
],
[
{
"meta": null,
"text": "3,0"
},
{
"meta": null,
"text": "3,0"
},
{
"meta": null,
"text": "3,0"
},
{
"meta": null,
"text": "6,0"
},
{
"meta": null,
"text": "5,5"
},
{
"meta": null,
"text": ""
}
],
[
{
"meta": "Max Mustermann 20",
"text": "1"
},
{
"meta": "Max Mustermann 2",
"text": "2"
},
{
"meta": "Max Mustermann 3",
"text": "3"
},
{
"meta": "Max Mustermann 16",
"text": "16"
},
{
"meta": "Max Mustermann 17",
"text": "17"
},
{
"meta": null,
"text": ""
}
],
[
{
"meta": null,
"text": "Adv"
},
{
"meta": null,
"text": "Beg"
},
{
"meta": null,
"text": "Beg"
},
{
"meta": null,
"text": "Beg"
},
{
"meta": null,
"text": "Beg"
},
{
"meta": null,
"text": ""
}
],
[
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": ""
}
],
[
{
"meta": null,
"text": "Jun"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": ""
}
]
]
}
}

View File

@ -0,0 +1,198 @@
{
"title": "19.11.2022 - ETW, Solos Kin./Jun. Sichtung Rumba",
"data": {
"titles": [
"Wertungsrichter",
"A) WRa\nB) WRb\nC) WRc",
"Ergebnis der Sichtung",
"Startnummer",
"Platz von\nPlatz bis",
"Aufstiegspunkte",
"Startgruppe"
],
"table": [
[
{
"meta": "Max Mustermann 20",
"text": "1"
},
{
"meta": "Max Mustermann 2",
"text": "2"
},
{
"meta": "Max Mustermann 3",
"text": "3"
},
{
"meta": "Max Mustermann 16",
"text": "16"
},
{
"meta": "Max Mustermann 17",
"text": "17"
},
{
"meta": null,
"text": "\u00a0"
}
],
[
{
"meta": null,
"text": "1,0\n1,0\n1,0"
},
{
"meta": null,
"text": "1,0\n1,0\n1,0"
},
{
"meta": null,
"text": "1,0\n1,0\n1,0"
},
{
"meta": null,
"text": "1,0\n2,0\n3,0"
},
{
"meta": null,
"text": "1,0\n1,5\n3,0"
},
{
"meta": null,
"text": "\u00a0"
}
],
[
{
"meta": null,
"text": "3,0"
},
{
"meta": null,
"text": "3,0"
},
{
"meta": null,
"text": "3,0"
},
{
"meta": null,
"text": "6,0"
},
{
"meta": null,
"text": "5,5"
},
{
"meta": null,
"text": "\u00a0"
}
],
[
{
"meta": "Max Mustermann 20",
"text": "1"
},
{
"meta": "Max Mustermann 2",
"text": "2"
},
{
"meta": "Max Mustermann 3",
"text": "3"
},
{
"meta": "Max Mustermann 16",
"text": "16"
},
{
"meta": "Max Mustermann 17",
"text": "17"
},
{
"meta": null,
"text": "\u00a0"
}
],
[
{
"meta": null,
"text": "Adv\n\u00a0"
},
{
"meta": null,
"text": "Beg\n\u00a0"
},
{
"meta": null,
"text": "Beg\n\u00a0"
},
{
"meta": null,
"text": "Beg\n\u00a0"
},
{
"meta": null,
"text": "Beg\n\u00a0"
},
{
"meta": null,
"text": "\u00a0\n\u00a0"
}
],
[
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "-"
},
{
"meta": null,
"text": "\u00a0"
}
],
[
{
"meta": null,
"text": "Jun"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": "Kin"
},
{
"meta": null,
"text": "\u00a0"
}
]
]
}
}

View File

@ -0,0 +1,130 @@
<!DOCTYPE html PUBLIC "-//WAPFORUM//DTD XHTML Mobile 1.2//EN" "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd">
<HTML>
<HEAD>
<META http-equiv="Content-Type" content="text/html; charset=utf-8">
<META name="Author" content="Saarländischer Landesverband für Tanzsport">
<META name="GENERATOR" content="TopTurnier">
<meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="Expires" content="0" />
<TITLE>19.11.2022 Kin./Jun. Sichtung Rumba</TITLE>
<link rel="stylesheet" type="text/css" href="topturnier.css">
<meta name="viewport" content="width=device-width, initial-scale=1" />
</HEAD>
<body><div class="mainback">
<div class="eventhead">
<table border=0 width=100%>
<tr><td>19.11.2022 - ETW, Solos Kin./Jun. Sichtung Rumba</td><td width=30>
<a class="backbtn" href="index.htm" target="_top">&equiv;</a>
</td></tr>
</table>
</div>
<div class="maincontainer">
<div class="comphead">Wertungstabelle Gesamt</div>
<hr class="line">
<div class="extract">
<TABLE class="tab1">
<TR>
<TD class="td2">Anzahl Teilnehmer: 18</TD>
<TD class="td2c" colspan="15">Startnummer</TD>
</TR>
<TR>
<TD class="td1" width="25%">Wertungsrichter</TD>
<TD class="td2gc" width="5%">1<span class="tooltip2gc">Max Mustermann 20</span></TD>
<TD class="td2gc" width="5%">2<span class="tooltip2gc">Max Mustermann 2</span></TD>
<TD class="td2gc" width="5%">3<span class="tooltip2gc">Max Mustermann 3</span></TD>
</TR>
<TR>
<TD class="td3" nowrap>A) WRa<br>B) WRb<br>C) WRc</TD>
<TD class="td5c">1,0<br>1,0<br>1,0</TD>
<TD class="td5c">1,0<br>1,0<br>1,0</TD>
<TD class="td5c">1,0<br>1,0<br>1,0</TD>
</TR>
<TR>
<TD class="td3">Ergebnis der Sichtung</TD>
<TD class="td5c">3,0</TD>
<TD class="td5c">3,0</TD>
<TD class="td5c">3,0</TD>
</TR>
<TR>
<TD class="td1" width="25%">Startnummer</TD>
<TD class="td2gc" width="5%">1<span class="tooltip2gc">Max Mustermann 20</span></TD>
<TD class="td2gc" width="5%">2<span class="tooltip2gc">Max Mustermann 2</span></TD>
<TD class="td2gc" width="5%">3<span class="tooltip2gc">Max Mustermann 3</span></TD>
</TR>
<TR>
<TD class="td3">Platz von<br>Platz bis</TD>
<TD class="td5cv">Adv<br>&nbsp;</TD>
<TD class="td5cv">Beg<br>&nbsp;</TD>
<TD class="td5cv">Beg<br>&nbsp;</TD>
</TR>
<TR>
<TD class="td3">Aufstiegspunkte</TD>
<TD class="td5c">-</TD>
<TD class="td5c">-</TD>
<TD class="td5c">-</TD>
</TR>
<TR>
<TD class="td3" nowrap>Startgruppe</TD>
<TD class="td5c">Jun</TD>
<TD class="td5c">Kin</TD>
<TD class="td5c">Kin</TD>
</TR>
</TABLE>
<br>
<TABLE class="tab1">
<TR>
<TD class="td2">Anzahl Teilnehmer: 18</TD>
<TD class="td2c" colspan="15">Startnummer</TD>
</TR>
<TR>
<TD class="td1" width="25%">Wertungsrichter</TD>
<TD class="td2gc" width="5%">16<span class="tooltip2gc">Max Mustermann 16</span></TD>
<TD class="td2gc" width="5%">17<span class="tooltip2gc">Max Mustermann 17</span></TD>
<TD class="td2gc" width="5%">&nbsp;<span class="tooltip2gc"></span></TD>
</TR>
<TR>
<TD class="td3" nowrap>A) WRa<br>B) WRb<br>C) WRc</TD>
<TD class="td5c">1,0<br>2,0<br>3,0</TD>
<TD class="td5c">1,0<br>1,5<br>3,0</TD>
<TD class="td5c">&nbsp;</TD>
</TR>
<TR>
<TD class="td3">Ergebnis der Sichtung</TD>
<TD class="td5c">6,0</TD>
<TD class="td5c">5,5</TD>
<TD class="td5c">&nbsp;</TD>
</TR>
<TR>
<TD class="td1" width="25%">Startnummer</TD>
<TD class="td2gc" width="5%">16<span class="tooltip2gc">Max Mustermann 16</span></TD>
<TD class="td2gc" width="5%">17<span class="tooltip2gc">Max Mustermann 17</span></TD>
<TD class="td2gc" width="5%">&nbsp;<span class="tooltip2gc"></span></TD>
</TR>
<TR>
<TD class="td3">Platz von<br>Platz bis</TD>
<TD class="td5cv">Beg<br>&nbsp;</TD>
<TD class="td5cv">Beg<br>&nbsp;</TD>
<TD class="td5cv">&nbsp;<br>&nbsp;</TD>
</TR>
<TR>
<TD class="td3">Aufstiegspunkte</TD>
<TD class="td5c">-</TD>
<TD class="td5c">-</TD>
<TD class="td5c">&nbsp;</TD>
</TR>
<TR>
<TD class="td3" nowrap>Startgruppe</TD>
<TD class="td5c">Kin</TD>
<TD class="td5c">Kin</TD>
<TD class="td5c">&nbsp;</TD>
</TR>
</TABLE>
<br>
<br>
</div>
</div>
</div>
<P><FONT size="1" face="Arial">Diese Liste wurde mit <A TARGET="_blank" HREF="http://www.TopTurnier.de">TopTurnier f&uuml;r Windows V9.3</A> erstellt.<br></FONT>
</body>
</HTML>

View File

@ -4,10 +4,10 @@ import json
import solo_turnier.html_parser import solo_turnier.html_parser
@pytest.fixture(scope='module', params=["1", '2']) @pytest.fixture(scope='module', params=range(2))
def dataProviderHtmlParser(request): def dataProviderHtmlParser(request):
variant = request.param variant = str(request.param+1)
dir = os.path.join(os.path.dirname(__file__), 'html_parser', variant) dir = os.path.join(os.path.dirname(__file__), 'html_parser', 'erg', variant)
htmlFile = os.path.join(dir, 'erg.htm') htmlFile = os.path.join(dir, 'erg.htm')
jsonFile = os.path.join(dir, 'expected.json') jsonFile = os.path.join(dir, 'expected.json')
@ -70,3 +70,49 @@ def test_guessDataFromTitle(fixture_guessDataFromTitle):
ret = parser.guessDataFromHtmlTitle(fixture_guessDataFromTitle[0]) ret = parser.guessDataFromHtmlTitle(fixture_guessDataFromTitle[0])
assert ret == fixture_guessDataFromTitle[1] assert ret == fixture_guessDataFromTitle[1]
@pytest.fixture(params=range(1))
def fixture_parsePreparationResult(request):
variant = str(request.param+1)
dir = os.path.join(os.path.dirname(__file__), 'html_parser', 'tabges', variant)
htmlFile = os.path.join(dir, 'tabges.htm')
jsonFile = os.path.join(dir, 'expected.json')
with open(htmlFile, 'r') as fp:
html = fp.read()
with open(jsonFile, 'r') as fp:
jsonContent = json.load(fp)
return (html, jsonContent)
def test_parsePreparationResult(fixture_parsePreparationResult):
html = fixture_parsePreparationResult[0]
jsonContent = fixture_parsePreparationResult[1]
parser = solo_turnier.html_parser.HtmlParser()
ret = parser.parsePreparationRoundString(html)
assert ret == jsonContent
@pytest.fixture(params=range(1))
def fixture_cleanPreparationImport(request):
variant = str(request.param+1)
dir = os.path.join(os.path.dirname(__file__), 'html_parser', 'tabges', variant)
srcFile = os.path.join(dir, 'expected.json')
expectedFile = os.path.join(dir, 'cleaned.json')
with open(srcFile, 'r') as fp:
source = json.load(fp)
with open(expectedFile, 'r') as fp:
expected = json.load(fp)
return (source, expected)
def test_cleanPreparationImport(fixture_cleanPreparationImport):
src = fixture_cleanPreparationImport[0]
expected = fixture_cleanPreparationImport[1]
parser = solo_turnier.html_parser.HtmlParser()
parser.cleanPreparationRoundImport(src)
assert src == expected

View File

@ -18,7 +18,7 @@ class ResultRow:
self.competitionClass = competitionClass self.competitionClass = competitionClass
def __str__(self): def __str__(self):
return f'{self.name} ({self.id}, {self.club}) are in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}' return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}'
class ResultPerson: class ResultPerson:
def __init__(self, firstName, lastName, club, id = None, group = None): def __init__(self, firstName, lastName, club, id = None, group = None):