import {
    API_REDLINE_TAG,
    ITaggedParagraph,
    IDocumentParser,
    IDocumentParserStatic,
    PARAGRAPH_INDEX_ATTRIBUTE,
    IHtmlParagraphCollection,
    STYLES_EXTRACTED,
} from './documentParser-types';

const DocumentParser: IDocumentParserStatic = class DocumentParser
    implements IDocumentParser
{
    static cellIsEmpty(cell: HTMLTableCellElement): boolean {
        return Boolean(
            cell?.querySelectorAll('[' + PARAGRAPH_INDEX_ATTRIBUTE + ']')
                .length === 0
        );
    }

    static extractStyles(text: string): CSSStyleSheet {
        const parser = new DOMParser();

        const styleSheet = new CSSStyleSheet();
        const parsedHtml = parser.parseFromString(text, 'text/html');
        for (const style of [
            ...parsedHtml.getElementsByTagName('style'),
        ].reverse()) {
            if (!style.outerText.includes('body')) {
                styleSheet.insertRule(style.outerText);
            }
        }

        return styleSheet;
    }

    static enhancedParseDocument(text: string): IHtmlParagraphCollection {
        const body = this.extractDirectChildrenOfBody(text);
        const elements = body.children;
        const isStylesExtracted =
            body.getAttribute(STYLES_EXTRACTED) === 'true';

        let htmlParagraphs: IHtmlParagraphCollection = {};
        let tableId = 1;

        for (const element of elements) {
            if (element.tagName === 'DIV') {
                const paragraphs = element.querySelectorAll(
                    `[${PARAGRAPH_INDEX_ATTRIBUTE}]`
                );

                // eslint-disable-next-line no-loop-func
                paragraphs.forEach((paragraph) => {
                    this.parseParagraph(
                        paragraph,
                        htmlParagraphs,
                        isStylesExtracted
                    );
                });
            } else if (element.tagName === 'P') {
                this.parseParagraph(element, htmlParagraphs, isStylesExtracted);
            } else if (element.tagName === 'TABLE') {
                htmlParagraphs = {
                    ...htmlParagraphs,
                    ...this.parseTable(element, tableId),
                };
                tableId++;
            }
        }

        return htmlParagraphs;
    }

    static extractDirectChildrenOfBody(text: string): HTMLBodyElement {
        const parser = new DOMParser();
        const parsedHtml = parser.parseFromString(text, 'text/html');
        return parsedHtml.querySelector('body')!;
    }

    static extractParagraphs(
        text: string
    ): HTMLCollectionOf<HTMLParagraphElement> {
        const parser = new DOMParser();
        const parsedHtml = parser.parseFromString(text, 'text/html');
        return parsedHtml.getElementsByTagName('p');
    }

    static parseCellParagraphs(
        cell: HTMLTableCellElement,
        tableId: number,
        rowNumber: number,
        cellNumber: number,
        prevEmptyCells: number,
        nextEmptyCells: number,
        totalParagraphsInTable: number,
        htmlParagraphs: IHtmlParagraphCollection,
        cellsInRow: number
    ) {
        const paragraphs = cell.getElementsByTagName('p');
        const rowSpan = cell.getAttribute('rowspan');
        const colSpan = cell.getAttribute('colspan');

        for (let j = 0; j < paragraphs.length; j++) {
            const paragraph = paragraphs[j];
            const location = paragraph.getAttribute(PARAGRAPH_INDEX_ATTRIBUTE);

            if (location === null) {
                continue;
            }

            const taggedParagraph = this.tagParagraph(paragraph.innerHTML);

            htmlParagraphs[location] = {
                location: parseInt(location, 10),
                text: taggedParagraph.text,
                html: taggedParagraph.html,
                classes: '',
                row: String(rowNumber),
                cell: String(cellNumber),
                cellsInRow,
                rowSpan: rowSpan ? parseInt(rowSpan, 10) : null,
                colSpan: colSpan ? parseInt(colSpan, 10) : null,
                tableId,
                totalParagraphsInTable,
                prevEmptyCells,
                nextEmptyCells,
            };
        }
    }

    static parseParagraph(
        paragraph: Element,
        htmlParagraphs: IHtmlParagraphCollection,
        isStylesExtracted: boolean
    ): void {
        const location = paragraph.getAttribute(PARAGRAPH_INDEX_ATTRIBUTE);

        const taggedParagraph = this.tagParagraph(
            paragraph.innerHTML,
            isStylesExtracted
        );

        let classes = '';
        if (isStylesExtracted) {
            classes = paragraph.getAttribute('class') ?? '';
        }

        if (location) {
            htmlParagraphs[location] = {
                location: parseInt(location, 10),
                text: taggedParagraph.text,
                html: taggedParagraph.html,
                classes: classes,
                row: null,
                cell: null,
                rowSpan: null,
                colSpan: null,
                tableId: null,
                totalParagraphsInTable: null,
                prevEmptyCells: null!,
                nextEmptyCells: null!,
                cellsInRow: null!,
            };
        }
    }

    static parseTable(
        table: Element,
        tableId: number
    ): IHtmlParagraphCollection {
        const htmlParagraphs: IHtmlParagraphCollection = {};
        const rows = table.getElementsByTagName('tr');
        const totalParagraphsInTable = table.querySelectorAll(
            `[${PARAGRAPH_INDEX_ATTRIBUTE}]`
        ).length;
        let rowNumber = 1;

        for (const row of rows) {
            const cells = row.getElementsByTagName('td');
            this.parseTableCells(
                cells,
                tableId,
                rowNumber,
                totalParagraphsInTable,
                htmlParagraphs
            );
            rowNumber++;
        }

        return htmlParagraphs;
    }

    static parseTableCells(
        cells: HTMLCollectionOf<HTMLTableCellElement>,
        tableId: number,
        rowNumber: number,
        totalParagraphsInTable: number,
        htmlParagraphs: IHtmlParagraphCollection
    ) {
        let cellNumber = 1;

        for (let i = 0; i < cells.length; i++) {
            let prevCellCounter = 1;
            let nextCellCounter = 1;
            let nextEmptyCells = 0;
            let prevEmptyCells = 0;

            while (this.cellIsEmpty(cells[i - prevCellCounter])) {
                prevEmptyCells++;
                prevCellCounter++;
            }

            while (this.cellIsEmpty(cells[i + nextCellCounter])) {
                nextEmptyCells++;
                nextCellCounter++;
            }

            this.parseCellParagraphs(
                cells[i],
                tableId,
                rowNumber,
                cellNumber,
                prevEmptyCells === i ? prevEmptyCells : 0,
                nextEmptyCells,
                totalParagraphsInTable,
                htmlParagraphs,
                cells.length
            );

            prevEmptyCells = 0;
            nextEmptyCells = 0;
            nextCellCounter = 1;
            prevCellCounter = 1;
            cellNumber++;
        }
    }

    static replaceRange(
        text: string,
        startIndex: number,
        endIndex: number,
        substitute: string
    ) {
        return (
            text.substring(0, startIndex) +
            substitute +
            text.substring(endIndex)
        );
    }

    static removeCssPropertyKeys(text: string): string {
        text = text.replace(/color:\s#?[\w\d]+;/g, '');
        text = text.replace(/font-weight:\s?(\w+);/g, '$1 ');
        text = text.replace(/font-style:\s?(\w+);/g, '$1 ');
        text = text.replace(/text-decoration:\s?(\w+);/g, '$1 ');
        text = text.replace(
            /padding-(left|right):\s?(\w+);/g,
            'padding-$1-$2 '
        );
        text = text.replace(/\s">/g, '">');
        text = text.replace(/\s+style=""/g, '');
        return text;
    }

    static replaceStyleTagsWithClassTags(text: string): string {
        return text.replace(/<span\s+(style=")/g, '<span class="');
    }

    static removeExtraClassFromTag(text: string) {
        return text.replace(/(class="[\w]+)"\s+class="/g, '$1 ');
    }

    static removeUnstyledSpanTags(text: string) {
        return text.replace(/<span>([^<]+)<\/span>/g, '$1');
    }

    static stripHtml(text: string): string {
        return text.replace(/(<([^>]+)>)/gi, '');
    }

    static tagParagraph(
        text: string,
        isStylesExtracted?: boolean
    ): ITaggedParagraph {
        if (!isStylesExtracted) {
            text = this.removeCssPropertyKeys(text);
            text = this.replaceStyleTagsWithClassTags(text);
            text = this.removeExtraClassFromTag(text);
        }
        text = text.replace(
            /padding-(left|right):\s?(\w+);/g,
            'padding-$1-$2 '
        );
        text = this.removeUnstyledSpanTags(text);
        text = this.trimWhitespace(text);
        const redlineRegex = new RegExp(API_REDLINE_TAG, 'gmi');
        text = text.replace(redlineRegex, 'redlined-text');

        return {
            text: this.stripHtml(text),
            html: text,
        };
    }

    static trimWhitespace(text: string) {
        return text.replace(/(\s+)</g, ' <').replace(/(\s+)>/g, '>');
    }
};

export default DocumentParser;
