#! /usr/bin/python3

from html.parser import HTMLParser
import os

def replace_eol (file):
    # replacement strings
    WINDOWS_LINE_ENDING = b'\r\n'
    UNIX_LINE_ENDING = b'\n'

    with open(file, 'rb') as pfile:
        content = pfile.read()

    content = content.replace(WINDOWS_LINE_ENDING, UNIX_LINE_ENDING)

    with open(file, 'wb') as pfile:
        pfile.write(content)


class MyHTMLParser(HTMLParser):

    def __init__ (self):
        HTMLParser.__init__(self)
        self.text = ""
        self.header = ""
        self.header_tags = ['h1','h2','h3','h4','h5','h6','h7','h8']
        self.header_tags_len = len(self.header_tags)
        self.path = []
        for i in range(0,self.header_tags_len):
            self.path.append('')

    def handle_starttag(self, tag, attrs):
        self.curr_tag = tag

        self.text = ""

        if tag in self.header_tags:
            self.header = ""

    def handle_endtag(self, tag):
        # print("Encountered an end tag :", tag)

        if tag in self.header_tags:

            index = self.header_tags.index(tag)
            self.path[index]  = self.header
            for i in range (index+1, self.header_tags_len):
                self.path[i]= ""

            full_path = ""
            for i in range(0,self.header_tags_len):
                if self.path[i] != '':

                    if full_path != '':
                        full_path = full_path + '__'

                    full_path = full_path +self.path[i]

            full_path = full_path.replace(' ', '-').replace('í','i').lower()

            start_tag = '<'+tag+'>'
            new_start = '<'+tag+' id="'+ full_path + '">'
            end_tag = '</'+tag+'>'

            old = (start_tag+self.header+end_tag);
            new  = (new_start+self.header+end_tag)
            # print (new +  '==>' + old)
            self.content = self.content.replace(old,new, 1);

        # print (self.text)
        self.text = ""

    def handle_data(self, data):
        #print("Encountered some data  :", data)
        if  self.curr_tag in self.header_tags:
            self.header = self.header +data

        self.text = self.text + data

    def feed (self, input_file):
        self.input_file = input_file

        with  open(self.input_file, "rt") as f:
            self.content = f.read()

        HTMLParser.feed(self,self.content)

    def save (self):
        with open (self.input_file, "wt") as f:
            f.write(self.content)

parser = MyHTMLParser()
parser.feed("tornavis.html")
parser.save()
replace_eol ("tornavis.html")