from bs4 import BeautifulSoup

import logging
import re

class HtmlParticipant:
    def __init__(self, name, place, finalist):
        self.name = name
        self.place = place
        self.finalist = finalist
    
    def __str__(self):
        return f'{self.name} (with place {self.place})'

class HtmlImport:
    def __init__(self, title: str, participants: dict[int, HtmlParticipant]):
        self.title = title
        self.participants = participants

class HtmlParser:

    def __init__(self, text: str):
        self.l = logging.getLogger('solo_turnier.html_parser')
        self.soup = BeautifulSoup(text, 'html.parser')
    
    def getEventTitle(self):
        return self.soup.find('div', class_='eventhead').table.tr.td.contents[0]

    def guessDataFromHtmlTitle(self, title = None):
        if title is None:
            title = self.getEventTitle()
        
        match = re.compile('.*?ETW, Solos (.*)').match(title)
        if match is None:
            raise Exception(f'Cannot parse title "{title}"')
        
        rest = match.group(1)
        rawGroup, rawClass, dance = rest.split(' ', 2)

        classMap = {
            'Newcomer': 'Newc.',
            'Beginner': 'Beg.',
            'Advanced': 'Adv.'
        }

        groupMap = {
            'Kinder': 'Kin.',
            'Junioren': 'Jun.',
            'Jugend': 'Jug.',
        }

        return {
            'dance': dance.strip(),
            'class_': classMap.get(rawClass, rawClass),
            'group': groupMap.get(rawGroup, rawGroup)
        }

    def parseString(self, text: str):
        soup = BeautifulSoup(text, 'html.parser')

        participants = {}

        def __parseRows(rows, finalist: bool):
            def __parseRow(row):
                tds = row.find_all('td')
                regex = re.compile('(.*) \\(([0-9]+)\\)')
                
                place = tds[0].contents[0]
                
                match = regex.fullmatch(tds[1].contents[0])
                if match is None:
                    raise Exception(f'Could not match {tds} to regex search pattern')
                name = match.group(1)
                number = match.group(2)

                participant = HtmlParticipant(name, place, finalist)
                participants[number] = participant
            
            for row in rows:
                __parseRow(row)

        def __parseFirstTable(table):
            roundName = table.tr.td.contents[0]
            if roundName != 'Endrunde':
                raise Exception('Could not parse HTML file')
            
            __parseRows(table.find_all('tr')[2:], True)

        def __parseRemainingTables(tables):
            for table in tables:
                __parseRows(table.find_all('tr')[2:], False)

        tables = soup.find('div', class_='extract').find_all('table')
        if len(tables) > 0:
            __parseFirstTable(tables[0])

            __parseRemainingTables(tables[1:])

        title = soup.find('div', class_='eventhead').table.tr.td.contents[0]

        ret = HtmlImport(title, participants)
        return ret

    def parsePreparationRound(self):
        title = self.soup.find('div', class_='eventhead').table.tr.td.contents[0]
        tableData = []
        rowTitles = []

        def __mapBr(td):
            for br in td.find_all('br'):
                br.replace_with('\n')
            td.smooth()
            return td

        def __extractTitles(table):
            for row in table.find_all('tr')[1:]:
                rowTitles.append(__mapBr(row.td).string)
        
        def __extractColumns(table):
            content = []

            def __extractContent(td):
                for br in td.find_all('br'):
                    br.replace_with('\n')
                
                span = td.span
                if span is not None:
                    span = span.extract()
                    meta = span.string
                else:
                    meta = None
                
                td.smooth()

                return {
                    'text': td.string,
                    'meta': meta
                }

            def __extractRow(row):
                entries = []
                for entry in row.find_all('td')[1:]:
                    entries.append(__extractContent(entry))
                return entries
            
            for row in table.find_all('tr')[1:]:
                content.append(__extractRow(row))
            
            return content
        
        def __mergeColumns(columns1, columns2):
            return list(map(lambda x, y: x + y, columns1, columns2))

        extract = self.soup.find('div', class_='extract')
        tables = extract.find_all('table', class_='tab1')

        __extractTitles(tables[0])
        tableData = __extractColumns(tables[0])

        for table in tables[1:]:
            tableData = __mergeColumns(tableData, __extractColumns(table))

        data = {
            'titles': rowTitles,
            'table': tableData
        }

        return {'title': title, 'data': data}
    
    def cleanPreparationRoundImport(self, data):
        def __cleanTable(table):
            def __cleanText(s: str):
                print("cleaning string ", s)
                return s.strip(' \n\xa0')
            
            def __cleanEntry(entry):
                entry['text'] = __cleanText(entry['text'])
                if entry['meta'] is not None:
                    entry['meta'] = __cleanText(entry['meta'])
            
            for row in table:
                for entry in row:
                    print(entry)
                    __cleanEntry(entry)

        data['title'] = data['title'].strip()
        __cleanTable(data['data']['table'])