import colorsys
import csv
import re
from bs4 import BeautifulSoup

# Default category
current_category = "unknown"


def extract_color(style_value):
    """Extract color hex value (e.g. #FFAA00) from a style string."""
    match = re.search(r'#([0-9A-Fa-f]{3,6})', style_value)
    return f"#{match.group(1)}" if match else ""


def hex_to_hsv(hex_color):
    """
    Convert #RRGGBB or #RGB hex color into HSV.
    Returns a dict with h (0-360), s (0-100), v (0-100).
    """
    if not hex_color:
        return {"h": None, "s": None, "v": None}

    # Remove '#'
    hex_color = hex_color.lstrip('#')

    # Convert short form #RGB → #RRGGBB
    if len(hex_color) == 3:
        hex_color = ''.join([c * 2 for c in hex_color])

    if len(hex_color) != 6:
        return {"h": None, "s": None, "v": None}

    # Convert to integers
    r = int(hex_color[0:2], 16) / 255.0
    g = int(hex_color[2:4], 16) / 255.0
    b = int(hex_color[4:6], 16) / 255.0

    h, s, v = colorsys.rgb_to_hsv(r, g, b)

    return {
        "h": round(h * 360, 6),
        "s": round(s, 6),
        "v": round(v, 6)
    }


def parse_html_file(file_path):
    global current_category
    with open(file_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, "html.parser")

    results = []

    article = soup.find("article")
    if not article:
        print("No <article> tag found in document.")
        return results

    for child in article.children:
        print(child.get("class", []))

        if not getattr(child, "name", None):
            continue  # skip text nodes and comments

        # Update category when <h1> tag encountered
        if child.name == "h2" or child.name == "h3":
            current_category = child.get_text(strip=True)
            print("change cat to: " + current_category)
            continue

        # Process product divs
        if child.name == "div" and ['ac', 'dg', 'bgl', 'cc', 'pr', 'mt4'] == child.get("class", []):
            div = child

            # Extract color
            color_div = div.find("div", class_="pr dib")
            color = ""
            if color_div and color_div.has_attr("style"):
                color = extract_color(color_div["style"])

            # Convert color → HSV
            hsv = hex_to_hsv(color)

            # Extract short name
            short_name_span = div.find("span", class_="bgb nw")
            short_name = short_name_span.get_text(strip=True) if short_name_span else ""

            # Extract name
            name_holder = div.find("div", class_="ar")
            name_a = name_holder.find("a", class_="pf")
            name = ''.join(name_a.find_all(string=True, recursive=False)).strip()

            # Extract reference (before <br>)
            ref_div = div.find("div", class_="ut")
            reference = ""
            if ref_div:
                ref_text = ref_div.decode_contents().split("<br>")[0]
                ref_text = ref_text.split("<br/>")[0]
                reference = BeautifulSoup(ref_text, "html.parser").get_text(strip=True)

            # Extract type
            type_div = div.find("div", class_="ccf c dib nw bgn")
            if (type_div is None):
                type_div = div.find("div", class_="cct c dib nw bgd")
            type_value = type_div.get_text(strip=True) if type_div else ""

            # Extract material
            mat_div = div.find("div", class_="cct c dib nw bgb")
            if (mat_div is None):
                mat_div = div.find("div", class_="cct c dib nw bgg")
            if (mat_div is None):
                mat_div = div.find("div", class_="cct c dib nw bgn")
            if (mat_div is None):
                mat_div = div.find("div", class_="cct c dib nw bgd")
            material = mat_div.get_text(strip=True) if mat_div else ""

            results.append({
                "category": current_category,
                "color": color,
                "h": hsv["h"],
                "s": hsv["s"],
                "v": hsv["v"],
                "short_name": short_name,
                "name": name,
                "reference": reference,
                "type": type_value,
                "material": material
            })

    return results


def write_csv(results, output_file="result_tamiya_extract.csv"):
    fieldnames = ["category", "color", "h", "s", "v", "short_name", "name", "reference", "type",
                  "material"]

    with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in results:
            writer.writerow(row)


if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print("Usage: python extract_tamiya.py <html_file_path>")
        sys.exit(1)

    file_path = sys.argv[1]
    extracted = parse_html_file(file_path)
    write_csv(extracted)
    print(f"Extraction complete. {len(extracted)} entries written to result_tamiya_extract.csv.")
