Removed preview extractor methods
This commit is contained in:
parent
b4ec4f896c
commit
82d7717fde
@ -105,89 +105,9 @@ class HtmlParser:
|
||||
|
||||
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
||||
|
||||
# ret = HtmlImport(title, participants)
|
||||
ret = HtmlResultImport(participants)
|
||||
return ret
|
||||
|
||||
def parsePreparationRound(self):
|
||||
title = self.soup.find("div", class_="eventhead").table.tr.td.contents[0]
|
||||
tableData = []
|
||||
rowTitles = []
|
||||
|
||||
def __mapBr(td):
|
||||
for br in td.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
td.smooth()
|
||||
return td
|
||||
|
||||
def __extractTitles(table):
|
||||
for row in table.find_all("tr")[1:]:
|
||||
rowTitles.append(__mapBr(row.td).string)
|
||||
|
||||
def __extractColumns(table):
|
||||
content = []
|
||||
|
||||
def __extractContent(td):
|
||||
for br in td.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
|
||||
span = td.span
|
||||
if span is not None:
|
||||
span = span.extract()
|
||||
meta = span.string
|
||||
else:
|
||||
meta = None
|
||||
|
||||
td.smooth()
|
||||
|
||||
return {"text": td.string.replace("\xa0", " ").strip(), "meta": meta}
|
||||
|
||||
def __extractRow(row):
|
||||
entries = []
|
||||
for entry in row.find_all("td")[1:]:
|
||||
entries.append(__extractContent(entry))
|
||||
return entries
|
||||
|
||||
for row in table.find_all("tr")[1:]:
|
||||
content.append(__extractRow(row))
|
||||
|
||||
return content
|
||||
|
||||
def __mergeColumns(columns1, columns2):
|
||||
return list(map(lambda x, y: x + y, columns1, columns2))
|
||||
|
||||
extract = self.soup.find("div", class_="extract")
|
||||
tables = extract.find_all("table", class_="tab1")
|
||||
|
||||
__extractTitles(tables[0])
|
||||
tableData = __extractColumns(tables[0])
|
||||
|
||||
for table in tables[1:]:
|
||||
tableData = __mergeColumns(tableData, __extractColumns(table))
|
||||
|
||||
data = {"titles": rowTitles, "table": tableData}
|
||||
|
||||
return {"title": title, "data": data}
|
||||
|
||||
def cleanPreparationRoundImport(self, data):
|
||||
def __cleanTable(table):
|
||||
def __cleanText(s: str):
|
||||
# print("cleaning string ", s)
|
||||
return s.strip(" \n\xa0")
|
||||
|
||||
def __cleanEntry(entry):
|
||||
entry["text"] = __cleanText(entry["text"])
|
||||
if entry["meta"] is not None:
|
||||
entry["meta"] = __cleanText(entry["meta"])
|
||||
|
||||
for row in table:
|
||||
for entry in row:
|
||||
# print(entry)
|
||||
__cleanEntry(entry)
|
||||
|
||||
data["title"] = data["title"].strip()
|
||||
__cleanTable(data["data"]["table"])
|
||||
|
||||
def parseIndividualResult(self, competitionGroup, competitionClass, dance):
|
||||
participants = {}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user