Removed preview extractor methods

This commit is contained in:
Christian Wolf 2023-11-20 12:09:42 +01:00
parent b4ec4f896c
commit 82d7717fde

View File

@ -105,89 +105,9 @@ class HtmlParser:
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0] # title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
# ret = HtmlImport(title, participants)
ret = HtmlResultImport(participants) ret = HtmlResultImport(participants)
return ret return ret
def parsePreparationRound(self):
title = self.soup.find("div", class_="eventhead").table.tr.td.contents[0]
tableData = []
rowTitles = []
def __mapBr(td):
for br in td.find_all("br"):
br.replace_with("\n")
td.smooth()
return td
def __extractTitles(table):
for row in table.find_all("tr")[1:]:
rowTitles.append(__mapBr(row.td).string)
def __extractColumns(table):
content = []
def __extractContent(td):
for br in td.find_all("br"):
br.replace_with("\n")
span = td.span
if span is not None:
span = span.extract()
meta = span.string
else:
meta = None
td.smooth()
return {"text": td.string.replace("\xa0", " ").strip(), "meta": meta}
def __extractRow(row):
entries = []
for entry in row.find_all("td")[1:]:
entries.append(__extractContent(entry))
return entries
for row in table.find_all("tr")[1:]:
content.append(__extractRow(row))
return content
def __mergeColumns(columns1, columns2):
return list(map(lambda x, y: x + y, columns1, columns2))
extract = self.soup.find("div", class_="extract")
tables = extract.find_all("table", class_="tab1")
__extractTitles(tables[0])
tableData = __extractColumns(tables[0])
for table in tables[1:]:
tableData = __mergeColumns(tableData, __extractColumns(table))
data = {"titles": rowTitles, "table": tableData}
return {"title": title, "data": data}
def cleanPreparationRoundImport(self, data):
def __cleanTable(table):
def __cleanText(s: str):
# print("cleaning string ", s)
return s.strip(" \n\xa0")
def __cleanEntry(entry):
entry["text"] = __cleanText(entry["text"])
if entry["meta"] is not None:
entry["meta"] = __cleanText(entry["meta"])
for row in table:
for entry in row:
# print(entry)
__cleanEntry(entry)
data["title"] = data["title"].strip()
__cleanTable(data["data"]["table"])
def parseIndividualResult(self, competitionGroup, competitionClass, dance): def parseIndividualResult(self, competitionGroup, competitionClass, dance):
participants = {} participants = {}