#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Apple Journal (Entries/Resources) → Obsidian 用 Markdown + 画像 + 動画変換

📌 何をする？
- Entries/*.html を読み、日付をファイル名(YYYY-MM-DD.md)にして「日記/」へ保存
- Resources 内の画像(HEIC/JPEG/PNG等)はJPGに正規化して「attachments/」へ保存
- Resources 内の動画(.mov)を回転を考慮してmp4に変換して「attachments/」へ保存
- md 内のリンクは Obsidian の wiki embed に統一:
  画像: ![[attachments/2024-04-19.jpg]]
  動画: ![[attachments/2026-01-03.mp4]]

📌 ユーザーが設定を変更する場所
- ROOT（AppleJournalEntriesのパス）
- TAG_WORDS（Journalの日記内からタグを抽出するためのキーワード）
- CLEAN_OUTPUT（既存の出力フォルダを削除してから開始するか場合はTrue）
"""

from __future__ import annotations

import re
import shutil
import subprocess
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Set

from bs4 import BeautifulSoup
from PIL import Image
from pillow_heif import register_heif_opener

register_heif_opener()

# =========================
# 0) 設定（読者が変更する場所）
# =========================
# 📌📌📌 設定変更 📌📌📌
ROOT = Path("/Users/*******/Library/Mobile Documents/com~apple~CloudDocs/AppleJournalEntries")
CLEAN_OUTPUT = True
TAG_WORDS = ["ドラマ", "映画", "旅行", "読書"]
# 📌📌📌 設定変更ここまで 📌📌📌

# 動画として扱う拡張子
VIDEO_EXTS = {".mov", ".mp4", ".m4v"}

# ffmpegがある場合、動画は常にmp4に再保存（向き問題対策）
ALWAYS_TRANSCODE_VIDEO_TO_MP4 = True

# mp4出力時の品質（小さいほど高画質/大容量）
H264_CRF = "20"

# =========================
# 1) 入出力フォルダ
# =========================

def pick_dir(*names: str) -> Path:
    for n in names:
        p = ROOT / n
        if p.exists():
            return p
    return ROOT / names[0]


ENTRIES_DIR = pick_dir("Entries", "html")
RESOURCES_DIR = pick_dir("Resources", "resources")

MD_DIR = ROOT / "日記"
ATTACH_DIR = ROOT / "attachments"

MD_DIR.mkdir(parents=True, exist_ok=True)
ATTACH_DIR.mkdir(parents=True, exist_ok=True)


# =========================
# 2) ユーティリティ
# =========================

def which(cmd: str) -> Optional[str]:
    return shutil.which(cmd)


def jp_header_to_ymd(header_text: str) -> Optional[str]:
    m = re.search(r"(\d{4})年(\d{1,2})月(\d{1,2})日", header_text)
    if not m:
        return None
    y, mo, d = int(m.group(1)), int(m.group(2)), int(m.group(3))
    return f"{y:04d}-{mo:02d}-{d:02d}"


def extract_header_text(soup: BeautifulSoup) -> str:
    el = soup.select_one("div.pageHeader")
    if el:
        return el.get_text(strip=True)

    whole = soup.get_text("\n", strip=True)
    m = re.search(r"\d{4}年\d{1,2}月\d{1,2}日", whole)
    return m.group(0) if m else ""


def unique_path(directory: Path, base_stem: str, suffix: str) -> Path:
    p = directory / f"{base_stem}{suffix}"
    if not p.exists():
        return p
    i = 1
    while True:
        p = directory / f"{base_stem}_({i}){suffix}"
        if not p.exists():
            return p
        i += 1


def tags_from_text(text: str) -> List[str]:
    return [w for w in TAG_WORDS if w in text]


def build_frontmatter(ymd: str, tags: List[str]) -> str:
    if tags:
        tag_block = "tags:\n" + "".join([f"  - {t}\n" for t in tags])
    else:
        tag_block = "tags: []\n"
    return f"---\ndate: {ymd}\n{tag_block}---\n\n"


def clean_dir_contents(d: Path) -> None:
    for p in d.iterdir():
        if p.is_file():
            p.unlink()
        elif p.is_dir():
            shutil.rmtree(p)


# =========================
# 3) 本文抽出（p2=本文, p3/p4=空行）
# =========================

def html_to_flat_text_preserving_breaks(soup: BeautifulSoup) -> str:
    body = soup.body
    if not body:
        return ""

    out_lines: List[str] = []

    for p in body.find_all("p", recursive=True):
        classes = set(p.get("class", []))

        if "p2" in classes:
            txt = p.get_text("\n", strip=True)
            txt = re.sub(r"\n{3,}", "\n\n", txt).strip()
            if txt:
                if out_lines and out_lines[-1] != "":
                    out_lines.append("")
                out_lines.append(txt)

        elif "p3" in classes or "p4" in classes:
            if not p.get_text(strip=True) and p.find("br") is not None:
                if out_lines and out_lines[-1] != "":
                    out_lines.append("")

    while out_lines and out_lines[-1] == "":
        out_lines.pop()

    text = "\n".join(out_lines).strip()
    return text + "\n" if text else ""


# =========================
# 4) Resources のインデックス
# =========================

def build_resource_index(resources_dir: Path) -> Dict[str, Path]:
    idx: Dict[str, Path] = {}
    for p in resources_dir.rglob("*"):
        if p.is_file():
            idx[p.name] = p
    return idx


def resource_from_src(src: str, res_index: Dict[str, Path]) -> Optional[Path]:
    # src="../Resources/XXXX.mov"
    name = src.split("/")[-1]
    if not name:
        return None
    return res_index.get(name)


# =========================
# 5) 画像をJPGに正規化
# =========================

def to_rgb_image(im: Image.Image) -> Image.Image:
    # 透過PNGなどは白背景合成
    if im.mode in ("RGBA", "LA"):
        bg = Image.new("RGB", im.size, (255, 255, 255))
        alpha = im.getchannel("A") if "A" in im.getbands() else None
        bg.paste(im.convert("RGB"), mask=alpha)
        return bg
    if im.mode != "RGB":
        return im.convert("RGB")
    return im


def save_normalized_jpg(src_path: Path, out_path: Path) -> None:
    with Image.open(src_path) as im:
        im2 = to_rgb_image(im)
        im2.save(out_path, "JPEG", quality=92, optimize=True)


def convert_image_to_attachments(resource_path: Path, md_stem: str, idx: int) -> Tuple[Path, str]:
    out_stem = md_stem if idx == 1 else f"{md_stem}_img{idx}"
    out_path = unique_path(ATTACH_DIR, out_stem, ".jpg")
    save_normalized_jpg(resource_path, out_path)
    return out_path, f"attachments/{out_path.name}"


# =========================
# 6) 動画: ffmpegがあればmp4に再保存（向き問題対策）
# =========================

def transcode_video_to_mp4(src: Path, dst: Path, ffmpeg_path: str) -> None:
    """
    - ffmpegは入力の回転メタデータを考慮して正しい向きにする挙動がある
    - その結果を「映像として固定」し、rotate=0 を付ける
    - これでObsidian側の回転メタデータ解釈差を避けやすい
    """
    subprocess.run(
        [
            ffmpeg_path,
            "-y",
            "-i", str(src),
            # 互換性重視
            "-c:v", "libx264",
            "-crf", H264_CRF,
            "-preset", "medium",
            "-pix_fmt", "yuv420p",
            "-movflags", "+faststart",
            # 出力側の回転メタデータを0に寄せる
            "-metadata:s:v:0", "rotate=0",
            "-c:a", "aac",
            "-b:a", "192k",
            str(dst),
        ],
        check=True,
    )


def copy_or_convert_video_to_attachments(resource_path: Path, md_stem: str, idx: int) -> Tuple[Path, str]:
    out_stem = md_stem if idx == 1 else f"{md_stem}_vid{idx}"

    ffmpeg_path = which("ffmpeg")

    # ffmpegが無い -> コピーのみ（元拡張子）
    if not ffmpeg_path:
        ext = resource_path.suffix.lower()
        out_path = unique_path(ATTACH_DIR, out_stem, ext)
        shutil.copy2(resource_path, out_path)
        return out_path, f"attachments/{out_path.name}"

    # ffmpegがある -> mp4へ
    if ALWAYS_TRANSCODE_VIDEO_TO_MP4:
        out_path = unique_path(ATTACH_DIR, out_stem, ".mp4")
        transcode_video_to_mp4(resource_path, out_path, ffmpeg_path)
        return out_path, f"attachments/{out_path.name}"

    # もし「必要なときだけ変換」にしたい場合はここを拡張
    ext = resource_path.suffix.lower()
    out_path = unique_path(ATTACH_DIR, out_stem, ext)
    shutil.copy2(resource_path, out_path)
    return out_path, f"attachments/{out_path.name}"


# =========================
# 7) メイン
# =========================

def main() -> None:
    if CLEAN_OUTPUT:
        clean_dir_contents(MD_DIR)
        clean_dir_contents(ATTACH_DIR)

    html_files = sorted(ENTRIES_DIR.glob("*.html"))
    if not html_files:
        print(f"No html files found in: {ENTRIES_DIR}")
        return

    if ALWAYS_TRANSCODE_VIDEO_TO_MP4 and not which("ffmpeg"):
        print("WARN: ffmpeg が見つかりません。動画は変換せずコピーのみになります。")
        print("      (macなら: brew install ffmpeg)")

    res_index = build_resource_index(RESOURCES_DIR)

    for html_path in html_files:
        soup = BeautifulSoup(
            html_path.read_text(encoding="utf-8", errors="replace"),
            "html.parser",
        )

        header = extract_header_text(soup)
        ymd = jp_header_to_ymd(header) or "unknown-date"

        md_path = unique_path(MD_DIR, ymd, ".md")
        md_stem = md_path.stem  # 2026-01-03 or 2026-01-03_(1)

        body_text = html_to_flat_text_preserving_breaks(soup)
        tags = tags_from_text(body_text)

        # ---- 画像抽出（img）----
        image_lines: List[str] = []
        img_idx = 0
        for img in soup.find_all("img"):
            src = img.get("src") or ""
            rp = resource_from_src(src, res_index)
            if not rp:
                continue
            img_idx += 1
            _, link = convert_image_to_attachments(rp, md_stem, img_idx)
            image_lines.append(f"![[{link}]]")

        # ---- 動画抽出（video/source）----
        video_lines: List[str] = []
        vid_idx = 0
        seen_videos: Set[Path] = set()

        for v in soup.find_all("video"):
            # Journalは <video><source src=...></video> が多い
            for s in v.find_all("source"):
                ssrc = s.get("src") or ""
                rp = resource_from_src(ssrc, res_index)
                if not rp:
                    continue
                if rp.suffix.lower() not in VIDEO_EXTS:
                    continue
                if rp in seen_videos:
                    continue
                seen_videos.add(rp)

                vid_idx += 1
                _, link = copy_or_convert_video_to_attachments(rp, md_stem, vid_idx)
                video_lines.append(f"![[{link}]]")

            # 念のため video[src] も対応
            vsrc = v.get("src")
            if vsrc:
                rp = resource_from_src(vsrc, res_index)
                if rp and rp.suffix.lower() in VIDEO_EXTS and rp not in seen_videos:
                    seen_videos.add(rp)
                    vid_idx += 1
                    _, link = copy_or_convert_video_to_attachments(rp, md_stem, vid_idx)
                    video_lines.append(f"![[{link}]]")

        # frontmatter
        fm_date = ymd if ymd != "unknown-date" else md_stem.split("_(")[0]
        md_text = build_frontmatter(fm_date, tags)

        # assets（上にまとめる）
        if image_lines:
            md_text += "\n".join(image_lines) + "\n\n"
        if video_lines:
            md_text += "\n".join(video_lines) + "\n\n"

        md_text += body_text

        md_path.write_text(md_text, encoding="utf-8")
        print(f"OK: {html_path.name} -> {md_path.name} (images: {len(image_lines)}, videos: {len(video_lines)})")


if __name__ == "__main__":
    main()