#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Apple Journal (Entries/Resources) → Obsidian MD and compatible image and video

📌 What it does?
- Convert Entries/*.html to Markdown with date as filename (YYYY-MM-DD.md) and save to "DailyNotes/"
- Normalize images (HEIC/JPEG/PNG etc.) in Resources to JPG and save to "attachments/"
- Convert videos (.mov) in Resources to mp4 considering rotation and save to "attachments/"
- Link images and videos in Markdown using Obsidian's wiki embed:
  image: ![[attachments/2024-04-19.jpg]]
  video: ![[attachments/2026-01-03.mp4]]

📌 User users can change settings:
- ROOT (path to AppleJournalEntries)
- TAG_WORDS (keywords to extract tags from Journal entries)
- CLEAN_OUTPUT (if True, delete existing output folders before starting)
"""

from __future__ import annotations

import re
import shutil
import subprocess
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Set

from bs4 import BeautifulSoup
from PIL import Image
from pillow_heif import register_heif_opener

register_heif_opener()

# =========================
# 0) User settings (User can change these)
# =========================
# 📌📌📌 Change settings here 📌📌📌
ROOT = Path("/Users/*******/Library/Mobile Documents/com~apple~CloudDocs/AppleJournalEntries")
CLEAN_OUTPUT = True
TAG_WORDS = ["movie", "travel", "book"]
# 📌📌📌 End of settings 📌📌📌

# Extensions for video files
VIDEO_EXTS = {".mov", ".mp4", ".m4v"}

# When ffmpeg is available, re-encode videos to mp4 (rotation issue workaround)
ALWAYS_TRANSCODE_VIDEO_TO_MP4 = True

# quality of mp4 output (smaller value = higher quality/larger file size)
H264_CRF = "20"

# =========================
# 1) Input/Output directories
# =========================

def pick_dir(*names: str) -> Path:
    for n in names:
        p = ROOT / n
        if p.exists():
            return p
    return ROOT / names[0]


ENTRIES_DIR = pick_dir("Entries", "html")
RESOURCES_DIR = pick_dir("Resources", "resources")

MD_DIR = ROOT / "DailyNotes"
ATTACH_DIR = ROOT / "attachments"

MD_DIR.mkdir(parents=True, exist_ok=True)
ATTACH_DIR.mkdir(parents=True, exist_ok=True)


# =========================
# 2) Utilities
# =========================

def which(cmd: str) -> Optional[str]:
    return shutil.which(cmd)


def jp_header_to_ymd(header_text: str) -> Optional[str]:
    m = re.search(r"(\d{4})年(\d{1,2})月(\d{1,2})日", header_text)
    if not m:
        return None
    y, mo, d = int(m.group(1)), int(m.group(2)), int(m.group(3))
    return f"{y:04d}-{mo:02d}-{d:02d}"


def extract_header_text(soup: BeautifulSoup) -> str:
    el = soup.select_one("div.pageHeader")
    if el:
        return el.get_text(strip=True)

    whole = soup.get_text("\n", strip=True)
    m = re.search(r"\d{4}年\d{1,2}月\d{1,2}日", whole)
    return m.group(0) if m else ""


def unique_path(directory: Path, base_stem: str, suffix: str) -> Path:
    p = directory / f"{base_stem}{suffix}"
    if not p.exists():
        return p
    i = 1
    while True:
        p = directory / f"{base_stem}_({i}){suffix}"
        if not p.exists():
            return p
        i += 1


def tags_from_text(text: str) -> List[str]:
    return [w for w in TAG_WORDS if w in text]


def build_frontmatter(ymd: str, tags: List[str]) -> str:
    if tags:
        tag_block = "tags:\n" + "".join([f"  - {t}\n" for t in tags])
    else:
        tag_block = "tags: []\n"
    return f"---\ndate: {ymd}\n{tag_block}---\n\n"


def clean_dir_contents(d: Path) -> None:
    for p in d.iterdir():
        if p.is_file():
            p.unlink()
        elif p.is_dir():
            shutil.rmtree(p)


# =========================
# 3) Extract body text (p2=body, p3/p4=empty lines)
# =========================

def html_to_flat_text_preserving_breaks(soup: BeautifulSoup) -> str:
    body = soup.body
    if not body:
        return ""

    out_lines: List[str] = []

    for p in body.find_all("p", recursive=True):
        classes = set(p.get("class", []))

        if "p2" in classes:
            txt = p.get_text("\n", strip=True)
            txt = re.sub(r"\n{3,}", "\n\n", txt).strip()
            if txt:
                if out_lines and out_lines[-1] != "":
                    out_lines.append("")
                out_lines.append(txt)

        elif "p3" in classes or "p4" in classes:
            if not p.get_text(strip=True) and p.find("br") is not None:
                if out_lines and out_lines[-1] != "":
                    out_lines.append("")

    while out_lines and out_lines[-1] == "":
        out_lines.pop()

    text = "\n".join(out_lines).strip()
    return text + "\n" if text else ""


# =========================
# 4) Resources index
# =========================

def build_resource_index(resources_dir: Path) -> Dict[str, Path]:
    idx: Dict[str, Path] = {}
    for p in resources_dir.rglob("*"):
        if p.is_file():
            idx[p.name] = p
    return idx


def resource_from_src(src: str, res_index: Dict[str, Path]) -> Optional[Path]:
    # src="../Resources/XXXX.mov"
    name = src.split("/")[-1]
    if not name:
        return None
    return res_index.get(name)


# =========================
# 5) Normalize images to JPG
# =========================

def to_rgb_image(im: Image.Image) -> Image.Image:
    # PNG with alpha
    if im.mode in ("RGBA", "LA"):
        bg = Image.new("RGB", im.size, (255, 255, 255))
        alpha = im.getchannel("A") if "A" in im.getbands() else None
        bg.paste(im.convert("RGB"), mask=alpha)
        return bg
    if im.mode != "RGB":
        return im.convert("RGB")
    return im


def save_normalized_jpg(src_path: Path, out_path: Path) -> None:
    with Image.open(src_path) as im:
        im2 = to_rgb_image(im)
        im2.save(out_path, "JPEG", quality=92, optimize=True)


def convert_image_to_attachments(resource_path: Path, md_stem: str, idx: int) -> Tuple[Path, str]:
    out_stem = md_stem if idx == 1 else f"{md_stem}_img{idx}"
    out_path = unique_path(ATTACH_DIR, out_stem, ".jpg")
    save_normalized_jpg(resource_path, out_path)
    return out_path, f"attachments/{out_path.name}"


# =========================
# 6) Vide: if ffmpeg is available, re-encode videos to mp4 (rotation issue workaround)
# =========================

def transcode_video_to_mp4(src: Path, dst: Path, ffmpeg_path: str) -> None:
    """
	- ffmpeg can take the input’s rotation metadata into account and output the video in the correct orientation.
	- “Bake in” that orientation (fix it into the actual frames) and set rotate=0.
	- This helps avoid differences in how Obsidian interprets rotation metadata.
    """
    subprocess.run(
        [
            ffmpeg_path,
            "-y",
            "-i", str(src),
            "-c:v", "libx264",
            "-crf", H264_CRF,
            "-preset", "medium",
            "-pix_fmt", "yuv420p",
            "-movflags", "+faststart",
            "-metadata:s:v:0", "rotate=0",
            "-c:a", "aac",
            "-b:a", "192k",
            str(dst),
        ],
        check=True,
    )


def copy_or_convert_video_to_attachments(resource_path: Path, md_stem: str, idx: int) -> Tuple[Path, str]:
    out_stem = md_stem if idx == 1 else f"{md_stem}_vid{idx}"

    ffmpeg_path = which("ffmpeg")

    # when ffmpeg is not available -> copy only (original extension)
    if not ffmpeg_path:
        ext = resource_path.suffix.lower()
        out_path = unique_path(ATTACH_DIR, out_stem, ext)
        shutil.copy2(resource_path, out_path)
        return out_path, f"attachments/{out_path.name}"

    # when ffmpeg is available -> mp4
    if ALWAYS_TRANSCODE_VIDEO_TO_MP4:
        out_path = unique_path(ATTACH_DIR, out_stem, ".mp4")
        transcode_video_to_mp4(resource_path, out_path, ffmpeg_path)
        return out_path, f"attachments/{out_path.name}"

    # in case conversion only when needed, extend here
    ext = resource_path.suffix.lower()
    out_path = unique_path(ATTACH_DIR, out_stem, ext)
    shutil.copy2(resource_path, out_path)
    return out_path, f"attachments/{out_path.name}"


# =========================
# 7) Main processing
# =========================

def main() -> None:
    if CLEAN_OUTPUT:
        clean_dir_contents(MD_DIR)
        clean_dir_contents(ATTACH_DIR)

    html_files = sorted(ENTRIES_DIR.glob("*.html"))
    if not html_files:
        print(f"No html files found in: {ENTRIES_DIR}")
        return

    if ALWAYS_TRANSCODE_VIDEO_TO_MP4 and not which("ffmpeg"):
        print("WARN: ffmpeg is not found. Videos will be copied without transcoding.")
        print("      (for mac: brew install ffmpeg)")

    res_index = build_resource_index(RESOURCES_DIR)

    for html_path in html_files:
        soup = BeautifulSoup(
            html_path.read_text(encoding="utf-8", errors="replace"),
            "html.parser",
        )

        header = extract_header_text(soup)
        ymd = jp_header_to_ymd(header) or "unknown-date"

        md_path = unique_path(MD_DIR, ymd, ".md")
        md_stem = md_path.stem  # 2026-01-03 or 2026-01-03_(1)

        body_text = html_to_flat_text_preserving_breaks(soup)
        tags = tags_from_text(body_text)

        # ---- Extract images (img) ----
        image_lines: List[str] = []
        img_idx = 0
        for img in soup.find_all("img"):
            src = img.get("src") or ""
            rp = resource_from_src(src, res_index)
            if not rp:
                continue
            img_idx += 1
            _, link = convert_image_to_attachments(rp, md_stem, img_idx)
            image_lines.append(f"![[{link}]]")

        # ---- Extract videos (video/source) ----
        video_lines: List[str] = []
        vid_idx = 0
        seen_videos: Set[Path] = set()

        for v in soup.find_all("video"):
            # Journalは <video><source src=...></video>
            for s in v.find_all("source"):
                ssrc = s.get("src") or ""
                rp = resource_from_src(ssrc, res_index)
                if not rp:
                    continue
                if rp.suffix.lower() not in VIDEO_EXTS:
                    continue
                if rp in seen_videos:
                    continue
                seen_videos.add(rp)

                vid_idx += 1
                _, link = copy_or_convert_video_to_attachments(rp, md_stem, vid_idx)
                video_lines.append(f"![[{link}]]")

            # in case of video[src]
            vsrc = v.get("src")
            if vsrc:
                rp = resource_from_src(vsrc, res_index)
                if rp and rp.suffix.lower() in VIDEO_EXTS and rp not in seen_videos:
                    seen_videos.add(rp)
                    vid_idx += 1
                    _, link = copy_or_convert_video_to_attachments(rp, md_stem, vid_idx)
                    video_lines.append(f"![[{link}]]")

        # frontmatter
        fm_date = ymd if ymd != "unknown-date" else md_stem.split("_(")[0]
        md_text = build_frontmatter(fm_date, tags)

        # assets
        if image_lines:
            md_text += "\n".join(image_lines) + "\n\n"
        if video_lines:
            md_text += "\n".join(video_lines) + "\n\n"

        md_text += body_text

        md_path.write_text(md_text, encoding="utf-8")
        print(f"OK: {html_path.name} -> {md_path.name} (images: {len(image_lines)}, videos: {len(video_lines)})")


if __name__ == "__main__":
    main()