From 7cacb3c326bb9f7fdba8e2b183a24011bae748db Mon Sep 17 00:00:00 2001
From: Jan Meyer <git@lolmerkat.cc>
Date: Tue, 10 Feb 2026 00:06:45 +0100
Subject: [PATCH] chore: init

---
 ripper.py | 531 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 531 insertions(+)
 create mode 100644 ripper.py

diff --git a/ripper.py b/ripper.py
new file mode 100644
index 0000000..cda9e4b
--- /dev/null
+++ b/ripper.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python3
+"""
+DVD/Blu-ray ripper using HandBrakeCLI with scene-style naming.
+
+Scans a disc mounted at /mnt/dvd, selects the best audio & subtitle track
+per language (passthrough), encodes with H.265 10-bit AMD VCE (falling back
+to x265_10bit on CPU), and generates an NFO file via pymediainfo.
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+# ── Constants ────────────────────────────────────────────────────────────────
+
+INPUT_PATH = "/mnt/dvd"
+OUTPUT_BASE = "/mnt/shared/ripped"
+
+ENCODER_PRIMARY = "vce_h265_10bit"
+ENCODER_FALLBACK = "x265_10bit"
+
+# Maps HandBrakeCLI codec names → scene-style tags
+AUDIO_CODEC_SCENE = {
+    "truehd":   "TrueHD",
+    "dtshd":    "DTS-HD.MA",
+    "dts":      "DTS",
+    "ac3":      "DD",
+    "eac3":     "DDP",      # Dolby Digital Plus
+    "aac":      "AAC",
+    "mp3":      "MP3",
+    "flac":     "FLAC",
+    "opus":     "OPUS",
+    "pcm":      "LPCM",
+    "lpcm":     "LPCM",
+    "mp2":      "MP2",
+    "vorbis":   "Vorbis",
+}
+
+CHANNEL_SCENE = {
+    1: "1.0",
+    2: "2.0",
+    3: "2.1",
+    6: "5.1",
+    7: "6.1",
+    8: "7.1",
+}
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def run(cmd: list[str], capture: bool = True) -> subprocess.CompletedProcess:
+    """Run a command, optionally capturing output."""
+    print(f"  ▸ {' '.join(cmd)}", file=sys.stderr)
+    return subprocess.run(
+        cmd,
+        capture_output=capture,
+        text=True,
+    )
+
+
+def detect_encoder() -> str:
+    """Return the best available H.265 10-bit encoder."""
+    result = run(["HandBrakeCLI", "--help"], capture=True)
+    combined = result.stdout + result.stderr
+    if ENCODER_PRIMARY in combined:
+        print(f"  ✓ Using hardware encoder: {ENCODER_PRIMARY}", file=sys.stderr)
+        return ENCODER_PRIMARY
+    print(
+        f"  ⚠ {ENCODER_PRIMARY} not available, falling back to {ENCODER_FALLBACK}",
+        file=sys.stderr,
+    )
+    return ENCODER_FALLBACK
+
+
+def scan_disc(input_path: str) -> dict:
+    """Scan the disc and return the parsed JSON structure."""
+    print("Scanning disc …", file=sys.stderr)
+    result = run([
+        "HandBrakeCLI",
+        "--input", input_path,
+        "--title", "0",
+        "--json",
+        "--scan",
+    ])
+    # HandBrakeCLI mixes stderr log lines with JSON on stdout.
+    # JSON blocks are labeled, e.g.:
+    #   Version: { ... }
+    #   Progress: { ... }
+    #   JSON Title Set: { "MainFeature": ..., "TitleList": [...] }
+    # We want the "JSON Title Set" block.  Also, "HandBrake has exited."
+    # can appear mid-stream and must be stripped.
+    combined = (result.stdout or "") + (result.stderr or "")
+
+    # Strip injected noise lines
+    cleaned_lines = [
+        line for line in combined.splitlines()
+        if "HandBrake has exited" not in line
+    ]
+
+    # Find the "JSON Title Set:" block and extract the JSON from it
+    capture = False
+    depth = 0
+    buf: list[str] = []
+    for line in cleaned_lines:
+        if not capture:
+            # Look for the label line
+            if "JSON Title Set:" in line:
+                # The JSON starts after the label on the same line
+                json_start = line.index("{")
+                buf.append(line[json_start:])
+                depth += line[json_start:].count("{") - line[json_start:].count("}")
+                capture = True
+                if depth <= 0:
+                    break
+            continue
+        # Inside JSON block
+        buf.append(line)
+        depth += line.count("{") - line.count("}")
+        if depth <= 0:
+            break
+
+    if not buf:
+        print("ERROR: Could not find 'JSON Title Set' in scan output.", file=sys.stderr)
+        print("  (Raw output tail follows)", file=sys.stderr)
+        for ln in cleaned_lines[-20:]:
+            print(f"    {ln}", file=sys.stderr)
+        sys.exit(1)
+
+    scan = json.loads("\n".join(buf))
+    return scan
+
+
+def select_title(scan: dict) -> dict:
+    """Select the main feature title (longest duration)."""
+    titles = scan.get("TitleList", [])
+    if not titles:
+        print("ERROR: No titles found on disc.", file=sys.stderr)
+        sys.exit(1)
+
+    # Prefer the one flagged MainFeature, else longest duration
+    main = [t for t in titles if t.get("MainFeature")]
+    if main:
+        title = main[0]
+    else:
+        title = max(titles, key=lambda t: t.get("Duration", {}).get("Ticks", 0))
+
+    dur = title.get("Duration", {})
+    h, m, s = dur.get("Hours", 0), dur.get("Minutes", 0), dur.get("Seconds", 0)
+    print(
+        f"  ✓ Selected title {title.get('Index', '?')} "
+        f"({h}h{m:02d}m{s:02d}s, "
+        f"{title.get('Geometry', {}).get('Width', '?')}×"
+        f"{title.get('Geometry', {}).get('Height', '?')})",
+        file=sys.stderr,
+    )
+    return title
+
+
+def best_tracks_per_language(tracks: list[dict], kind: str) -> list[dict]:
+    """
+    For each unique language, select the single best track.
+
+    Audio: prefer higher channel count, then higher bitrate.
+    Subtitle: prefer the first non-forced track per language.
+    """
+    by_lang: dict[str, list[dict]] = {}
+    for t in tracks:
+        lang = t.get("LanguageCode", "und")
+        by_lang.setdefault(lang, []).append(t)
+
+    selected = []
+    for lang, group in by_lang.items():
+        if kind == "audio":
+            best = max(
+                group,
+                key=lambda t: (t.get("ChannelCount", 0), t.get("BitRate", 0)),
+            )
+        else:
+            # Prefer first non-forced, full subtitle
+            non_forced = [t for t in group if not t.get("Attributes", {}).get("Forced", False)]
+            best = non_forced[0] if non_forced else group[0]
+        selected.append(best)
+    return selected
+
+
+def get_resolution_tag(title: dict) -> str:
+    """Return a scene-style resolution tag like 1080p, 2160p, 720p."""
+    height = title.get("Geometry", {}).get("Height", 0)
+    if height >= 2000:
+        return "2160p"
+    if height >= 1000:
+        return "1080p"
+    if height >= 700:
+        return "720p"
+    if height >= 400:
+        return "480p"
+    return f"{height}p"
+
+
+def get_source_tag(input_path: str) -> str:
+    """Guess source type from disc structure."""
+    if os.path.isdir(os.path.join(input_path, "BDMV")):
+        return "BluRay"
+    return "DVD"
+
+
+def get_volume_label(input_path: str) -> str | None:
+    """Try to read the disc volume label."""
+    # Try blkid first
+    try:
+        result = subprocess.run(
+            ["blkid", "-o", "value", "-s", "LABEL", input_path],
+            capture_output=True, text=True, timeout=5,
+        )
+        label = result.stdout.strip()
+        if label:
+            return label
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        pass
+
+    # Try reading from /dev/disk/by-label or the mount point's .disk/info
+    # Fall back to the mount source device
+    try:
+        result = subprocess.run(
+            ["findmnt", "-n", "-o", "SOURCE", input_path],
+            capture_output=True, text=True, timeout=5,
+        )
+        device = result.stdout.strip()
+        if device:
+            result = subprocess.run(
+                ["blkid", "-o", "value", "-s", "LABEL", device],
+                capture_output=True, text=True, timeout=5,
+            )
+            label = result.stdout.strip()
+            if label:
+                return label
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        pass
+
+    return None
+
+
+def scene_audio_tag(audio_track: dict) -> str:
+    """Build the primary audio scene tag like DTS-HD.MA.5.1"""
+    codec = audio_track.get("CodecName", "unknown").lower()
+    # Map known codec names
+    scene_codec = AUDIO_CODEC_SCENE.get(codec, codec.upper())
+    channels = CHANNEL_SCENE.get(audio_track.get("ChannelCount", 2), "2.0")
+    return f"{scene_codec}.{channels}"
+
+
+def build_scene_name(
+    movie_name: str,
+    year: str | None,
+    title: dict,
+    audio_tracks: list[dict],
+    source_tag: str,
+) -> str:
+    """
+    Build a scene-style filename (without extension).
+
+    Example: Game.Night.2018.1080p.BluRay.10bit.x265.DTS-HD.MA.5.1.MULTI-6.Audio
+    """
+    parts: list[str] = []
+
+    # Movie name: replace spaces/underscores with dots
+    clean = re.sub(r"[\s_]+", ".", movie_name.strip())
+    clean = re.sub(r"[^\w.]", "", clean)  # strip weird chars
+    parts.append(clean)
+
+    if year:
+        parts.append(year)
+
+    parts.append(get_resolution_tag(title))
+    parts.append(source_tag)
+    parts.append("10bit")
+    parts.append("x265")
+
+    # Primary audio tag (best quality track overall)
+    if audio_tracks:
+        primary = max(
+            audio_tracks,
+            key=lambda t: (t.get("ChannelCount", 0), t.get("BitRate", 0)),
+        )
+        parts.append(scene_audio_tag(primary))
+
+    # Multi-language count
+    langs = {t.get("LanguageCode", "und") for t in audio_tracks}
+    if len(langs) > 1:
+        parts.append(f"MULTI-{len(langs)}.Audio")
+
+    return ".".join(parts)
+
+
+def build_handbrake_cmd(
+    input_path: str,
+    output_path: str,
+    title: dict,
+    audio_tracks: list[dict],
+    subtitle_tracks: list[dict],
+    encoder: str,
+) -> list[str]:
+    """Build the full HandBrakeCLI command line."""
+    cmd = [
+        "HandBrakeCLI",
+        "--input", input_path,
+        "--output", output_path,
+        "--format", "av_mkv",
+        "--title", str(title.get("Index", 1)),
+        "--markers",
+        # Video
+        "--encoder", encoder,
+        "--quality", "22",
+        "--rate", "30",
+        "--pfr",
+        "--color-range", "limited",
+        "--encoder-preset", "balanced",
+        "--encoder-profile", "main10",
+        "--encoder-level", "auto",
+    ]
+
+    # Audio: passthrough all selected tracks
+    if audio_tracks:
+        track_nums = ",".join(str(t["TrackNumber"]) for t in audio_tracks)
+        encoders = ",".join("copy" for _ in audio_tracks)
+        cmd += [
+            "--audio", track_nums,
+            "--aencoder", encoders,
+            "--audio-copy-mask",
+            "aac,ac3,eac3,truehd,dts,dtshd,mp2,mp3,opus,vorbis,flac,alac",
+            "--audio-fallback", "av_aac",
+        ]
+
+    # Subtitles: passthrough all selected tracks
+    if subtitle_tracks:
+        track_nums = ",".join(str(t["TrackNumber"]) for t in subtitle_tracks)
+        cmd += ["--subtitle", track_nums]
+
+    return cmd
+
+
+def generate_nfo(mkv_path: str) -> str:
+    """Generate a .nfo file next to the MKV using pymediainfo."""
+    from pymediainfo import MediaInfo
+
+    nfo_path = mkv_path.rsplit(".", 1)[0] + ".nfo"
+    media_info = MediaInfo.parse(mkv_path)
+
+    lines: list[str] = []
+    lines.append(f"{'=' * 72}")
+    lines.append(f"  {os.path.basename(mkv_path)}")
+    lines.append(f"{'=' * 72}")
+    lines.append("")
+
+    for track in media_info.tracks:
+        track_type = track.track_type
+        lines.append(f"--- {track_type} ---")
+
+        if track_type == "General":
+            fields = [
+                ("Format", track.format),
+                ("File size", track.other_file_size[0] if track.other_file_size else None),
+                ("Duration", track.other_duration[0] if track.other_duration else None),
+                ("Overall bit rate", track.other_overall_bit_rate[0] if track.other_overall_bit_rate else None),
+            ]
+        elif track_type == "Video":
+            fields = [
+                ("Format", track.format),
+                ("Format profile", track.format_profile),
+                ("Bit depth", f"{track.bit_depth} bits" if track.bit_depth else None),
+                ("Width", f"{track.width} pixels" if track.width else None),
+                ("Height", f"{track.height} pixels" if track.height else None),
+                ("Display aspect ratio", track.other_display_aspect_ratio[0] if track.other_display_aspect_ratio else None),
+                ("Frame rate", track.other_frame_rate[0] if track.other_frame_rate else None),
+                ("Color range", track.color_range),
+                ("HDR format", track.hdr_format),
+            ]
+        elif track_type == "Audio":
+            fields = [
+                ("Format", track.format),
+                ("Commercial name", track.commercial_name),
+                ("Channels", f"{track.channel_s} channels" if track.channel_s else None),
+                ("Channel layout", track.channel_layout),
+                ("Sampling rate", track.other_sampling_rate[0] if track.other_sampling_rate else None),
+                ("Bit rate", track.other_bit_rate[0] if track.other_bit_rate else None),
+                ("Language", track.other_language[0] if track.other_language else None),
+                ("Title", track.title),
+            ]
+        elif track_type == "Text":
+            fields = [
+                ("Format", track.format),
+                ("Language", track.other_language[0] if track.other_language else None),
+                ("Forced", track.forced),
+                ("Title", track.title),
+            ]
+        else:
+            fields = [("Format", track.format)]
+
+        for label, value in fields:
+            if value is not None:
+                lines.append(f"  {label:30s}: {value}")
+        lines.append("")
+
+    nfo_content = "\n".join(lines)
+    with open(nfo_path, "w", encoding="utf-8") as f:
+        f.write(nfo_content)
+
+    print(f"  ✓ NFO written to {nfo_path}", file=sys.stderr)
+    return nfo_path
+
+
+# ── Main ─────────────────────────────────────────────────────────────────────
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Rip DVD/Blu-ray to MKV using HandBrakeCLI (H.265 10-bit).",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=(
+            "Examples:\n"
+            "  %(prog)s --name 'Game Night' --year 2018\n"
+            "  %(prog)s --scan-only\n"
+            "  %(prog)s --name Inception --year 2010 --input /dev/sr0\n"
+        ),
+    )
+    parser.add_argument(
+        "--name", "-n",
+        help="Movie name (spaces OK, will be dotted). "
+             "If omitted, the disc volume label is used.",
+    )
+    parser.add_argument("--year", "-y", help="Release year for the filename.")
+    parser.add_argument(
+        "--input", "-i",
+        default=INPUT_PATH,
+        help=f"Input path (default: {INPUT_PATH}).",
+    )
+    parser.add_argument(
+        "--output-base",
+        default=OUTPUT_BASE,
+        help=f"Base output directory (default: {OUTPUT_BASE}).",
+    )
+    parser.add_argument(
+        "--scan-only",
+        action="store_true",
+        help="Only scan the disc and print track info, don't encode.",
+    )
+    args = parser.parse_args()
+
+    # ── 1. Detect encoder ────────────────────────────────────────────────
+    encoder = detect_encoder()
+
+    # ── 2. Scan disc ─────────────────────────────────────────────────────
+    scan = scan_disc(args.input)
+    title = select_title(scan)
+
+    # ── 3. Select tracks ─────────────────────────────────────────────────
+    audio_all = title.get("AudioList", [])
+    subtitle_all = title.get("SubtitleList", [])
+
+    audio_sel = best_tracks_per_language(audio_all, "audio")
+    subtitle_sel = best_tracks_per_language(subtitle_all, "subtitle")
+
+    # Print selected tracks
+    print("\n  Audio tracks selected:", file=sys.stderr)
+    for t in audio_sel:
+        desc = t.get("Description") or t.get("Language", "?")
+        print(f"    #{t['TrackNumber']}  {desc}", file=sys.stderr)
+
+    print("\n  Subtitle tracks selected:", file=sys.stderr)
+    for t in subtitle_sel:
+        lang = t.get("Language", "?")
+        forced = " [forced]" if t.get("Attributes", {}).get("Forced") else ""
+        print(f"    #{t['TrackNumber']}  {lang}{forced}", file=sys.stderr)
+
+    if args.scan_only:
+        print("\n  (scan-only mode, exiting)", file=sys.stderr)
+        return
+
+    # ── 4. Build filename ────────────────────────────────────────────────
+    movie_name = args.name
+    if not movie_name:
+        label = get_volume_label(args.input)
+        if label:
+            # Volume labels are often UPPER_CASE_WITH_UNDERSCORES
+            movie_name = label.replace("_", " ").title()
+            print(f"  ✓ Using volume label: {movie_name}", file=sys.stderr)
+        else:
+            movie_name = input("  Enter movie name: ").strip()
+            if not movie_name:
+                print("ERROR: No movie name provided.", file=sys.stderr)
+                sys.exit(1)
+
+    source_tag = get_source_tag(args.input)
+    scene = build_scene_name(movie_name, args.year, title, audio_sel, source_tag)
+
+    # Create output directory
+    out_dir = os.path.join(args.output_base, scene)
+    os.makedirs(out_dir, exist_ok=True)
+    output_file = os.path.join(out_dir, f"{scene}.mkv")
+
+    print(f"\n  Output: {output_file}", file=sys.stderr)
+
+    # ── 5. Encode ────────────────────────────────────────────────────────
+    cmd = build_handbrake_cmd(
+        args.input, output_file, title, audio_sel, subtitle_sel, encoder,
+    )
+
+    print(f"\n{'=' * 60}", file=sys.stderr)
+    print("  Starting encode …", file=sys.stderr)
+    print(f"{'=' * 60}\n", file=sys.stderr)
+
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        print(f"\nERROR: HandBrakeCLI exited with code {result.returncode}", file=sys.stderr)
+        sys.exit(result.returncode)
+
+    print(f"\n  ✓ Encode complete: {output_file}", file=sys.stderr)
+
+    # ── 6. Generate NFO ──────────────────────────────────────────────────
+    generate_nfo(output_file)
+
+    print(f"\n  ✓ All done! Output in {out_dir}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()