Removed preview extractor methods
This commit is contained in:
parent
b4ec4f896c
commit
82d7717fde
@ -105,89 +105,9 @@ class HtmlParser:
|
|||||||
|
|
||||||
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
# title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
|
||||||
|
|
||||||
# ret = HtmlImport(title, participants)
|
|
||||||
ret = HtmlResultImport(participants)
|
ret = HtmlResultImport(participants)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def parsePreparationRound(self):
|
|
||||||
title = self.soup.find("div", class_="eventhead").table.tr.td.contents[0]
|
|
||||||
tableData = []
|
|
||||||
rowTitles = []
|
|
||||||
|
|
||||||
def __mapBr(td):
|
|
||||||
for br in td.find_all("br"):
|
|
||||||
br.replace_with("\n")
|
|
||||||
td.smooth()
|
|
||||||
return td
|
|
||||||
|
|
||||||
def __extractTitles(table):
|
|
||||||
for row in table.find_all("tr")[1:]:
|
|
||||||
rowTitles.append(__mapBr(row.td).string)
|
|
||||||
|
|
||||||
def __extractColumns(table):
|
|
||||||
content = []
|
|
||||||
|
|
||||||
def __extractContent(td):
|
|
||||||
for br in td.find_all("br"):
|
|
||||||
br.replace_with("\n")
|
|
||||||
|
|
||||||
span = td.span
|
|
||||||
if span is not None:
|
|
||||||
span = span.extract()
|
|
||||||
meta = span.string
|
|
||||||
else:
|
|
||||||
meta = None
|
|
||||||
|
|
||||||
td.smooth()
|
|
||||||
|
|
||||||
return {"text": td.string.replace("\xa0", " ").strip(), "meta": meta}
|
|
||||||
|
|
||||||
def __extractRow(row):
|
|
||||||
entries = []
|
|
||||||
for entry in row.find_all("td")[1:]:
|
|
||||||
entries.append(__extractContent(entry))
|
|
||||||
return entries
|
|
||||||
|
|
||||||
for row in table.find_all("tr")[1:]:
|
|
||||||
content.append(__extractRow(row))
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
def __mergeColumns(columns1, columns2):
|
|
||||||
return list(map(lambda x, y: x + y, columns1, columns2))
|
|
||||||
|
|
||||||
extract = self.soup.find("div", class_="extract")
|
|
||||||
tables = extract.find_all("table", class_="tab1")
|
|
||||||
|
|
||||||
__extractTitles(tables[0])
|
|
||||||
tableData = __extractColumns(tables[0])
|
|
||||||
|
|
||||||
for table in tables[1:]:
|
|
||||||
tableData = __mergeColumns(tableData, __extractColumns(table))
|
|
||||||
|
|
||||||
data = {"titles": rowTitles, "table": tableData}
|
|
||||||
|
|
||||||
return {"title": title, "data": data}
|
|
||||||
|
|
||||||
def cleanPreparationRoundImport(self, data):
|
|
||||||
def __cleanTable(table):
|
|
||||||
def __cleanText(s: str):
|
|
||||||
# print("cleaning string ", s)
|
|
||||||
return s.strip(" \n\xa0")
|
|
||||||
|
|
||||||
def __cleanEntry(entry):
|
|
||||||
entry["text"] = __cleanText(entry["text"])
|
|
||||||
if entry["meta"] is not None:
|
|
||||||
entry["meta"] = __cleanText(entry["meta"])
|
|
||||||
|
|
||||||
for row in table:
|
|
||||||
for entry in row:
|
|
||||||
# print(entry)
|
|
||||||
__cleanEntry(entry)
|
|
||||||
|
|
||||||
data["title"] = data["title"].strip()
|
|
||||||
__cleanTable(data["data"]["table"])
|
|
||||||
|
|
||||||
def parseIndividualResult(self, competitionGroup, competitionClass, dance):
|
def parseIndividualResult(self, competitionGroup, competitionClass, dance):
|
||||||
participants = {}
|
participants = {}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user