Parse preview rounds in batch script
This commit is contained in:
		
							parent
							
								
									ffa1e36b6f
								
							
						
					
					
						commit
						9e59ca79d2
					
				| @ -177,46 +177,60 @@ class BatchWorker: | |||||||
|         self.l.debug(self.config.__dict__) |         self.l.debug(self.config.__dict__) | ||||||
|          |          | ||||||
|         locator = solo_turnier.html_locator.HtmlLocator() |         locator = solo_turnier.html_locator.HtmlLocator() | ||||||
|         self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath()) |         self.l.info('Checking for feasible preview HTML export files in "%s"', self.config.importHtmlPath()) | ||||||
|         htmlCandidates = locator.findCandidates(self.config.importHtmlPath()) |         htmlCandidatesPreview = locator.findPreviewRoundCandidates(self.config.importHtmlPath()) | ||||||
|         self.l.debug('Found HTML file candidates: %s', htmlCandidates) |         self.l.debug('Found HTML file candidates for preview rounds: %s', htmlCandidatesPreview) | ||||||
| 
 | 
 | ||||||
|         htmlParser = solo_turnier.html_parser.HtmlParser() |         previewWorker = solo_turnier.worker.PreviewWorker() | ||||||
|         htmlImports = [] |         self.l.info('Filtering for pure preview rounds.') | ||||||
|         self.l.info('Importing the candidates') |         parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview) | ||||||
|         for candidate in htmlCandidates: |         self.l.debug('Remaining files: %s', parsers.keys()) | ||||||
|             self.l.debug('Processing file %s', candidate) | 
 | ||||||
|             with open(candidate, 'r') as fp: |         self.l.info('Extracting person data from the preview rounds.') | ||||||
|                 fileContent = fp.read() |         previewWorker.extractPersonsFromPreview(parsers) | ||||||
|             htmlImports.append(htmlParser.parseString(fileContent)) |  | ||||||
|         self.l.info('All HTML files have been parsed') |  | ||||||
| 
 | 
 | ||||||
|         csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath()) |         csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath()) | ||||||
|         self.l.info('Importing the total result CSV file %s', self.config.importCSVPath()) |         self.l.info('Importing the total result CSV file %s', self.config.importCSVPath()) | ||||||
|         csvData = csvReader.readFile() |         csvData = csvReader.readFile() | ||||||
|         self.l.info('CSV file has been read') |         self.l.info('CSV file has been read') | ||||||
|          |          | ||||||
|         self.l.info('Processing the imported data') |  | ||||||
|          |  | ||||||
|         csvExtractor = solo_turnier.worker.CSVExtractor() |  | ||||||
|         self.l.debug('Importing CSV data into internal structures') |  | ||||||
|         csvRows = csvExtractor.mapCSVImport(csvData) |  | ||||||
| 
 |  | ||||||
|         worker = solo_turnier.worker.DataWorker() |         worker = solo_turnier.worker.DataWorker() | ||||||
|         self.l.debug('Combining results from CSV for individual users') |  | ||||||
|         data = worker.combineRowsByPerson(csvRows) |  | ||||||
|         self.l.debug('Fix the groups for combined competitions') |  | ||||||
|         unambiguous, fixedGroups = worker.consolidateGroups(data) |  | ||||||
|         if fixedGroups: |  | ||||||
|             self.l.info('It was required to fix some group issues.') |  | ||||||
|         if not unambiguous: |  | ||||||
|             self.warning('There were couples whose group could not be extracted unambiguously.') |  | ||||||
|         self.l.debug('Merging HTML and CSV data') |  | ||||||
|         worker.mergeHtmlData(data, htmlImports) |  | ||||||
|         self.l.info('Data is prepared') |  | ||||||
| 
 | 
 | ||||||
|         consoleOutputtter = solo_turnier.output.ConsoleOutputter() |         # self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath()) | ||||||
|         consoleOutputtter.output(data) |         # htmlCandidates = locator.findCandidates(self.config.importHtmlPath()) | ||||||
|  |         # self.l.debug('Found HTML file candidates: %s', htmlCandidates) | ||||||
|  | 
 | ||||||
|  |         # htmlParser = solo_turnier.html_parser.HtmlParser() | ||||||
|  |         # htmlImports = [] | ||||||
|  |         # self.l.info('Importing the candidates') | ||||||
|  |         # for candidate in htmlCandidates: | ||||||
|  |         #     self.l.debug('Processing file %s', candidate) | ||||||
|  |         #     with open(candidate, 'r') as fp: | ||||||
|  |         #         fileContent = fp.read() | ||||||
|  |         #     htmlImports.append(htmlParser.parseString(fileContent)) | ||||||
|  |         # self.l.info('All HTML files have been parsed') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         # self.l.info('Processing the imported data') | ||||||
|  |          | ||||||
|  |         # csvExtractor = solo_turnier.worker.CSVExtractor() | ||||||
|  |         # self.l.debug('Importing CSV data into internal structures') | ||||||
|  |         # csvRows = csvExtractor.mapCSVImport(csvData) | ||||||
|  | 
 | ||||||
|  |         # self.l.debug('Combining results from CSV for individual users') | ||||||
|  |         # data = worker.combineRowsByPerson(csvRows) | ||||||
|  |         # self.l.debug('Fix the groups for combined competitions') | ||||||
|  |         # unambiguous, fixedGroups = worker.consolidateGroups(data) | ||||||
|  |         # if fixedGroups: | ||||||
|  |         #     self.l.info('It was required to fix some group issues.') | ||||||
|  |         # if not unambiguous: | ||||||
|  |         #     self.warning('There were couples whose group could not be extracted unambiguously.') | ||||||
|  |         # self.l.debug('Merging HTML and CSV data') | ||||||
|  |         # worker.mergeHtmlData(data, htmlImports) | ||||||
|  |         # self.l.info('Data is prepared') | ||||||
|  | 
 | ||||||
|  |         # consoleOutputtter = solo_turnier.output.ConsoleOutputter() | ||||||
|  |         # consoleOutputtter.output(data) | ||||||
| 
 | 
 | ||||||
|     def run1(self): |     def run1(self): | ||||||
|         allResults, finals = self.__extractDataFromFiles() |         allResults, finals = self.__extractDataFromFiles() | ||||||
|  | |||||||
| @ -170,7 +170,7 @@ class HtmlParser: | |||||||
|     def cleanPreparationRoundImport(self, data): |     def cleanPreparationRoundImport(self, data): | ||||||
|         def __cleanTable(table): |         def __cleanTable(table): | ||||||
|             def __cleanText(s: str): |             def __cleanText(s: str): | ||||||
|                 print("cleaning string ", s) |                 # print("cleaning string ", s) | ||||||
|                 return s.strip(' \n\xa0') |                 return s.strip(' \n\xa0') | ||||||
|              |              | ||||||
|             def __cleanEntry(entry): |             def __cleanEntry(entry): | ||||||
| @ -180,7 +180,7 @@ class HtmlParser: | |||||||
|              |              | ||||||
|             for row in table: |             for row in table: | ||||||
|                 for entry in row: |                 for entry in row: | ||||||
|                     print(entry) |                     # print(entry) | ||||||
|                     __cleanEntry(entry) |                     __cleanEntry(entry) | ||||||
| 
 | 
 | ||||||
|         data['title'] = data['title'].strip() |         data['title'] = data['title'].strip() | ||||||
|  | |||||||
| @ -4,6 +4,7 @@ import csv | |||||||
| import os | import os | ||||||
| import logging | import logging | ||||||
| import re | import re | ||||||
|  | from pprint import pformat | ||||||
| 
 | 
 | ||||||
| class AllResultReader: | class AllResultReader: | ||||||
|     def __init__(self, fileName: str): |     def __init__(self, fileName: str): | ||||||
| @ -25,5 +26,6 @@ class AllResultReader: | |||||||
|             'data': rows[1:] |             'data': rows[1:] | ||||||
|         } |         } | ||||||
|          |          | ||||||
|         logging.getLogger('solo_turnier.reader.all_results').debug('Imported results from allresults.csv file: %s', ret) |         l = logging.getLogger('solo_turnier.reader.all_results') | ||||||
|  |         l.log(5, 'Imported results from allresults.csv file: %s', (ret)) | ||||||
|         return ret |         return ret | ||||||
|  | |||||||
| @ -20,6 +20,15 @@ class ResultRow: | |||||||
|     def __str__(self): |     def __str__(self): | ||||||
|         return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}' |         return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}' | ||||||
| 
 | 
 | ||||||
|  | class HtmlPerson: | ||||||
|  |     def __init__(self, name, id, group): | ||||||
|  |         self.name = name | ||||||
|  |         self.id = id | ||||||
|  |         self.group = group | ||||||
|  |      | ||||||
|  |     def __repr__(self): | ||||||
|  |         return f'{self.name} ({self.id}, {self.group})' | ||||||
|  | 
 | ||||||
| class ResultPerson: | class ResultPerson: | ||||||
|     def __init__(self, firstName, lastName, club, id = None, group = None): |     def __init__(self, firstName, lastName, club, id = None, group = None): | ||||||
|         self.firstName = firstName |         self.firstName = firstName | ||||||
| @ -148,6 +157,76 @@ class CSVExtractor: | |||||||
|          |          | ||||||
|         return ret |         return ret | ||||||
| 
 | 
 | ||||||
|  | class PreviewWorker: | ||||||
|  |     def __init__(self): | ||||||
|  |         self.l = logging.getLogger('solo_turnier.worker.PreviewWorker') | ||||||
|  |      | ||||||
|  |     def filterFilesPreview(self, files: list[str]) -> dict[str, html_parser.HtmlParser]: | ||||||
|  |         self.l.debug('Filtering the list of parsers by removing all non preview entries.') | ||||||
|  |         ret = {} | ||||||
|  |         for file in files: | ||||||
|  |             with open(file, 'r') as fp: | ||||||
|  |                 text = fp.read() | ||||||
|  |              | ||||||
|  |             parser = html_parser.HtmlParser(text) | ||||||
|  | 
 | ||||||
|  |             try: | ||||||
|  |                 data = parser.guessDataFromHtmlTitle() | ||||||
|  |             except: | ||||||
|  |                 self.l.error(f'Unable to parse html file in {file}. Please check manually.') | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             if data['class_'] == 'Sichtung': | ||||||
|  |                 self.l.debug(f"Found candidate in {file}. Adding to the list.") | ||||||
|  |                 ret[file] = parser | ||||||
|  |             else: | ||||||
|  |                 self.l.debug(f'Rejecting file {file} as the name {data["class_"]} did not match.') | ||||||
|  |          | ||||||
|  |         return ret | ||||||
|  |      | ||||||
|  |     def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser): | ||||||
|  |         imported = parser.parsePreparationRound() | ||||||
|  |         parser.cleanPreparationRoundImport(imported) | ||||||
|  |         data = imported['data'] | ||||||
|  |          | ||||||
|  |         self.l.log(5, data) | ||||||
|  |          | ||||||
|  |         if data['titles'][0] != 'Wertungsrichter': | ||||||
|  |             self.l.fatal('Cannot parse the parsed content of the preview file.') | ||||||
|  |             raise Exception('Incompatible export file') | ||||||
|  | 
 | ||||||
|  |         ids = [] | ||||||
|  |         names = [] | ||||||
|  |         indices = [] | ||||||
|  |         for index, e in enumerate(data['table'][0]): | ||||||
|  |             if e['text'] == '': | ||||||
|  |                 continue | ||||||
|  |             indices.append(index) | ||||||
|  |             ids.append(e['text']) | ||||||
|  |             names.append(e['meta']) | ||||||
|  | 
 | ||||||
|  |         groups = [] | ||||||
|  | 
 | ||||||
|  |         if data['titles'][-1] == 'Startgruppe': | ||||||
|  |             self.l.debug('Combined competition found. Extracting group from table') | ||||||
|  |             groups = [data['table'][-1][idx]['text'] for idx in indices] | ||||||
|  |         else: | ||||||
|  |             self.l.debug('Using group from the title.') | ||||||
|  |             group = parser.guessDataFromHtmlTitle(imported['title'])['group'] | ||||||
|  |             groups = [group for i in indices] | ||||||
|  | 
 | ||||||
|  |         ret = [] | ||||||
|  |         for i in range(len(ids)): | ||||||
|  |             ret.append(HtmlPerson(names[i], ids[i], groups[i])) | ||||||
|  | 
 | ||||||
|  |         self.l.log(5, ret) | ||||||
|  |         return ret | ||||||
|  | 
 | ||||||
|  |     def extractPersonsFromPreview(self, parsers): | ||||||
|  |         for file in parsers: | ||||||
|  |             self.l.debug('Extracting person data from %s', file) | ||||||
|  |             self.__extractPersonsFromSinglePreview(parsers[file]) | ||||||
|  | 
 | ||||||
| class DataWorker: | class DataWorker: | ||||||
|     def __init__(self): |     def __init__(self): | ||||||
|         self.l = logging.getLogger('solo_turnier.worker') |         self.l = logging.getLogger('solo_turnier.worker') | ||||||
| @ -297,3 +376,5 @@ class DataWorker: | |||||||
|                 ret.append(competitions[0]) |                 ret.append(competitions[0]) | ||||||
|              |              | ||||||
|         return ret |         return ret | ||||||
|  | 
 | ||||||
|  |      | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user