"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.parseHTML = exports.isHTML = void 0;
const node_html_parser_1 = require("node-html-parser");
const segments_1 = require("../segments");
const timestamp_1 = require("../timestamp");
/**
 * Regular expression to detect `<html>` tag
 */
const PATTERN_HTML_TAG = /^< *html.*?>/i;
/**
 * Regular expression to detect transcript data where `<time>` is followed by `<p>`
 */
const PATTERN_HTML_TIME_P = /(?<time><time>\d[\d:.,]*?<\/time>)[ \t\r\n]*?(?<body><p>.*?<\/p>)/i;
/**
 * Regular expression to detect transcript data where `<p>` is followed by `<time>`
 */
const PATTERN_HTML_P_TIME = /(?<body><p>.*?<\/p>)[ \t\r\n]*?(?<time><time>\d[\d:.,]*?<\/time>)/i;
/**
 * Determines if the value of data is a valid HTML transcript format
 *
 * @param data The transcript data
 * @returns True: data is valid HTML transcript format
 */
const isHTML = (data) => {
    return (data.startsWith("<!--") ||
        PATTERN_HTML_TAG.test(data) ||
        PATTERN_HTML_TIME_P.test(data) ||
        PATTERN_HTML_P_TIME.test(data));
};
exports.isHTML = isHTML;
/**
 * Updates HTML Segment parts if expected HTML segment
 *
 * @param element HTML segment to check
 * @param segmentPart Current segment parts
 * @returns Updated HTML Segment part and segment data for next segment (if fields encountered)
 */
const updateSegmentPartFromElement = (element, segmentPart) => {
    const currentSegmentPart = segmentPart;
    const nextSegmentPart = {
        cite: "",
        time: "",
        text: "",
    };
    if (element.tagName === "CITE") {
        if (currentSegmentPart.cite === "") {
            currentSegmentPart.cite = element.innerHTML;
        }
        else {
            nextSegmentPart.cite = element.innerHTML;
        }
    }
    else if (element.tagName === "TIME") {
        if (currentSegmentPart.time === "") {
            currentSegmentPart.time = element.innerHTML;
        }
        else {
            nextSegmentPart.time = element.innerHTML;
        }
    }
    else if (element.tagName === "P") {
        if (currentSegmentPart.text === "") {
            currentSegmentPart.text = element.innerHTML;
        }
        else {
            nextSegmentPart.text = element.innerHTML;
        }
    }
    return { current: currentSegmentPart, next: nextSegmentPart };
};
/**
 * Create Segment from HTML segment parts
 *
 * @param segmentPart HTML segment data
 * @param lastSpeaker Name of last speaker. Will be used if no speaker found in `segmentLines`
 * @returns Created segment
 */
const createSegmentFromSegmentPart = (segmentPart, lastSpeaker) => {
    const calculatedSpeaker = segmentPart.cite ? segmentPart.cite : lastSpeaker;
    const startTime = (0, timestamp_1.parseTimestamp)(segmentPart.time);
    return {
        startTime,
        startTimeFormatted: timestamp_1.TimestampFormatter.format(startTime),
        endTime: 0,
        endTimeFormatted: timestamp_1.TimestampFormatter.format(0),
        speaker: calculatedSpeaker.replace(":", "").trimEnd(),
        body: segmentPart.text,
    };
};
/**
 * Parse HTML data and create {@link Segment} for each segment data found in data
 *
 * @param elements HTML elements containing transcript data
 * @returns Segments created from HTML data
 */
const getSegmentsFromHTMLElements = (elements) => {
    let outSegments = [];
    let lastSpeaker = "";
    let segmentPart = {
        cite: "",
        time: "",
        text: "",
    };
    let nextSegmentPart = {
        cite: "",
        time: "",
        text: "",
    };
    elements.forEach((element, count) => {
        const segmentParts = updateSegmentPartFromElement(element, segmentPart);
        segmentPart = segmentParts.current;
        nextSegmentPart = segmentParts.next;
        if (count === elements.length - 1 ||
            Object.keys(nextSegmentPart).filter((x) => nextSegmentPart[x] === "").length !== 3) {
            // time is required
            if (segmentPart.time === "") {
                console.warn(`Segment ${count} does not contain time information, ignoring`);
            }
            else {
                const segment = createSegmentFromSegmentPart(segmentPart, lastSpeaker);
                lastSpeaker = segment.speaker;
                // update endTime of previous Segment
                const totalSegments = outSegments.length;
                if (totalSegments > 0) {
                    outSegments[totalSegments - 1].endTime = segment.startTime;
                    outSegments[totalSegments - 1].endTimeFormatted = timestamp_1.TimestampFormatter.format(outSegments[totalSegments - 1].endTime);
                }
                outSegments = (0, segments_1.addSegment)(segment, outSegments);
            }
            // clear
            segmentPart = nextSegmentPart;
            nextSegmentPart = {
                cite: "",
                time: "",
                text: "",
            };
        }
    });
    return outSegments;
};
/**
 * Parse HTML data to an Array of {@link Segment}
 *
 * @param data The transcript data
 * @returns An array of Segments from the parsed data
 * @throws {TypeError} When `data` is not valid HTML format
 */
const parseHTML = (data) => {
    const dataTrimmed = data.trim();
    if (!(0, exports.isHTML)(dataTrimmed)) {
        throw new TypeError(`Data is not valid HTML format`);
    }
    const html = (0, node_html_parser_1.parse)(data);
    let root;
    const htmlElements = html.getElementsByTagName("html");
    if (htmlElements.length === 0) {
        root = html;
    }
    else {
        const htmlElement = htmlElements[0];
        const bodyElements = htmlElement.getElementsByTagName("body");
        if (bodyElements.length > 0) {
            // eslint-disable-next-line prefer-destructuring
            root = bodyElements[0];
        }
        else {
            root = htmlElement;
        }
    }
    return getSegmentsFromHTMLElements(root.childNodes);
};
exports.parseHTML = parseHTML;
