Parse preview rounds in batch script

2022-11-27 09:10:17 +01:00
parent ffa1e36b6f
commit 9e59ca79d2
4 changed files with 131 additions and 34 deletions
--- a/src/solo_turnier/batch.py
+++ b/src/solo_turnier/batch.py
@@ -177,46 +177,60 @@ class BatchWorker:
        self.l.debug(self.config.__dict__)
        
        locator = solo_turnier.html_locator.HtmlLocator()
-        self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
-        htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
-        self.l.debug('Found HTML file candidates: %s', htmlCandidates)
+        self.l.info('Checking for feasible preview HTML export files in "%s"', self.config.importHtmlPath())
+        htmlCandidatesPreview = locator.findPreviewRoundCandidates(self.config.importHtmlPath())
+        self.l.debug('Found HTML file candidates for preview rounds: %s', htmlCandidatesPreview)

-        htmlParser = solo_turnier.html_parser.HtmlParser()
-        htmlImports = []
-        self.l.info('Importing the candidates')
-        for candidate in htmlCandidates:
-            self.l.debug('Processing file %s', candidate)
-            with open(candidate, 'r') as fp:
-                fileContent = fp.read()
-            htmlImports.append(htmlParser.parseString(fileContent))
-        self.l.info('All HTML files have been parsed')
+        previewWorker = solo_turnier.worker.PreviewWorker()
+        self.l.info('Filtering for pure preview rounds.')
+        parsers = previewWorker.filterFilesPreview(htmlCandidatesPreview)
+        self.l.debug('Remaining files: %s', parsers.keys())
+
+        self.l.info('Extracting person data from the preview rounds.')
+        previewWorker.extractPersonsFromPreview(parsers)

        csvReader = solo_turnier.reader.AllResultReader(self.config.importCSVPath())
        self.l.info('Importing the total result CSV file %s', self.config.importCSVPath())
        csvData = csvReader.readFile()
        self.l.info('CSV file has been read')
-
-        self.l.info('Processing the imported data')
        
-        csvExtractor = solo_turnier.worker.CSVExtractor()
-        self.l.debug('Importing CSV data into internal structures')
-        csvRows = csvExtractor.mapCSVImport(csvData)
-
        worker = solo_turnier.worker.DataWorker()
-        self.l.debug('Combining results from CSV for individual users')
-        data = worker.combineRowsByPerson(csvRows)
-        self.l.debug('Fix the groups for combined competitions')
-        unambiguous, fixedGroups = worker.consolidateGroups(data)
-        if fixedGroups:
-            self.l.info('It was required to fix some group issues.')
-        if not unambiguous:
-            self.warning('There were couples whose group could not be extracted unambiguously.')
-        self.l.debug('Merging HTML and CSV data')
-        worker.mergeHtmlData(data, htmlImports)
-        self.l.info('Data is prepared')

-        consoleOutputtter = solo_turnier.output.ConsoleOutputter()
-        consoleOutputtter.output(data)
+        # self.l.info('Checking for feasible HTML export files in "%s"', self.config.importHtmlPath())
+        # htmlCandidates = locator.findCandidates(self.config.importHtmlPath())
+        # self.l.debug('Found HTML file candidates: %s', htmlCandidates)
+
+        # htmlParser = solo_turnier.html_parser.HtmlParser()
+        # htmlImports = []
+        # self.l.info('Importing the candidates')
+        # for candidate in htmlCandidates:
+        #     self.l.debug('Processing file %s', candidate)
+        #     with open(candidate, 'r') as fp:
+        #         fileContent = fp.read()
+        #     htmlImports.append(htmlParser.parseString(fileContent))
+        # self.l.info('All HTML files have been parsed')
+
+
+        # self.l.info('Processing the imported data')
+        
+        # csvExtractor = solo_turnier.worker.CSVExtractor()
+        # self.l.debug('Importing CSV data into internal structures')
+        # csvRows = csvExtractor.mapCSVImport(csvData)
+
+        # self.l.debug('Combining results from CSV for individual users')
+        # data = worker.combineRowsByPerson(csvRows)
+        # self.l.debug('Fix the groups for combined competitions')
+        # unambiguous, fixedGroups = worker.consolidateGroups(data)
+        # if fixedGroups:
+        #     self.l.info('It was required to fix some group issues.')
+        # if not unambiguous:
+        #     self.warning('There were couples whose group could not be extracted unambiguously.')
+        # self.l.debug('Merging HTML and CSV data')
+        # worker.mergeHtmlData(data, htmlImports)
+        # self.l.info('Data is prepared')
+
+        # consoleOutputtter = solo_turnier.output.ConsoleOutputter()
+        # consoleOutputtter.output(data)

    def run1(self):
        allResults, finals = self.__extractDataFromFiles()
--- a/src/solo_turnier/html_parser.py
+++ b/src/solo_turnier/html_parser.py
@@ -170,7 +170,7 @@ class HtmlParser:
    def cleanPreparationRoundImport(self, data):
        def __cleanTable(table):
            def __cleanText(s: str):
-                print("cleaning string ", s)
+                # print("cleaning string ", s)
                return s.strip(' \n\xa0')
            
            def __cleanEntry(entry):
@@ -180,7 +180,7 @@ class HtmlParser:
            
            for row in table:
                for entry in row:
-                    print(entry)
+                    # print(entry)
                    __cleanEntry(entry)

        data['title'] = data['title'].strip()
--- a/src/solo_turnier/reader.py
+++ b/src/solo_turnier/reader.py
@@ -4,6 +4,7 @@ import csv
 import os
 import logging
 import re
+from pprint import pformat

 class AllResultReader:
    def __init__(self, fileName: str):
@@ -25,5 +26,6 @@ class AllResultReader:
            'data': rows[1:]
        }
        
-        logging.getLogger('solo_turnier.reader.all_results').debug('Imported results from allresults.csv file: %s', ret)
+        l = logging.getLogger('solo_turnier.reader.all_results')
+        l.log(5, 'Imported results from allresults.csv file: %s', (ret))
        return ret
--- a/src/solo_turnier/worker.py
+++ b/src/solo_turnier/worker.py
@@ -20,6 +20,15 @@ class ResultRow:
    def __str__(self):
        return f'{self.name} ({self.id}, {self.club}) is in {self.group} {self.class_} and danced the {self.dance} in {self.competitionGroup} {self.competitionClass} getting place {self.place}-{self.placeTo}'

+class HtmlPerson:
+    def __init__(self, name, id, group):
+        self.name = name
+        self.id = id
+        self.group = group
+    
+    def __repr__(self):
+        return f'{self.name} ({self.id}, {self.group})'
+
 class ResultPerson:
    def __init__(self, firstName, lastName, club, id = None, group = None):
        self.firstName = firstName
@@ -148,6 +157,76 @@ class CSVExtractor:
        
        return ret

+class PreviewWorker:
+    def __init__(self):
+        self.l = logging.getLogger('solo_turnier.worker.PreviewWorker')
+    
+    def filterFilesPreview(self, files: list[str]) -> dict[str, html_parser.HtmlParser]:
+        self.l.debug('Filtering the list of parsers by removing all non preview entries.')
+        ret = {}
+        for file in files:
+            with open(file, 'r') as fp:
+                text = fp.read()
+            
+            parser = html_parser.HtmlParser(text)
+
+            try:
+                data = parser.guessDataFromHtmlTitle()
+            except:
+                self.l.error(f'Unable to parse html file in {file}. Please check manually.')
+                continue
+
+            if data['class_'] == 'Sichtung':
+                self.l.debug(f"Found candidate in {file}. Adding to the list.")
+                ret[file] = parser
+            else:
+                self.l.debug(f'Rejecting file {file} as the name {data["class_"]} did not match.')
+        
+        return ret
+    
+    def __extractPersonsFromSinglePreview(self, parser: html_parser.HtmlParser):
+        imported = parser.parsePreparationRound()
+        parser.cleanPreparationRoundImport(imported)
+        data = imported['data']
+        
+        self.l.log(5, data)
+        
+        if data['titles'][0] != 'Wertungsrichter':
+            self.l.fatal('Cannot parse the parsed content of the preview file.')
+            raise Exception('Incompatible export file')
+
+        ids = []
+        names = []
+        indices = []
+        for index, e in enumerate(data['table'][0]):
+            if e['text'] == '':
+                continue
+            indices.append(index)
+            ids.append(e['text'])
+            names.append(e['meta'])
+
+        groups = []
+
+        if data['titles'][-1] == 'Startgruppe':
+            self.l.debug('Combined competition found. Extracting group from table')
+            groups = [data['table'][-1][idx]['text'] for idx in indices]
+        else:
+            self.l.debug('Using group from the title.')
+            group = parser.guessDataFromHtmlTitle(imported['title'])['group']
+            groups = [group for i in indices]
+
+        ret = []
+        for i in range(len(ids)):
+            ret.append(HtmlPerson(names[i], ids[i], groups[i]))
+
+        self.l.log(5, ret)
+        return ret
+
+    def extractPersonsFromPreview(self, parsers):
+        for file in parsers:
+            self.l.debug('Extracting person data from %s', file)
+            self.__extractPersonsFromSinglePreview(parsers[file])
+
 class DataWorker:
    def __init__(self):
        self.l = logging.getLogger('solo_turnier.worker')
@@ -297,3 +376,5 @@ class DataWorker:
                ret.append(competitions[0])
            
        return ret
+
+