zeeguu · mircealungu · May 31, 2026 · May 31, 2026
diff --git a/tools/migrations/26-05-31-a--add_caption_translation.sql b/tools/migrations/26-05-31-a--add_caption_translation.sql
@@ -0,0 +1,37 @@
+-- v1.5: translated captions for a shared video.
+-- A `caption_translation_set` is a per-(video, target_language, target_cefr) bundle that owns
+-- per-original-caption translated text rows. Timing stays on the parent `caption` rows so we
+-- don't duplicate it (the player aligns by original time_start/time_end). Status drives the
+-- async translation job (mirrors the daily-audio-lesson status pattern).
+
+CREATE TABLE `caption_translation_set` (
+    `id` int NOT NULL AUTO_INCREMENT,
+    `video_id` int NOT NULL,
+    `target_language_id` int NOT NULL,
+    `cefr_level` enum('A1','A2','B1','B2','C1','C2') NOT NULL,
+    `status` enum('pending','translating','ready','error') NOT NULL DEFAULT 'pending',
+    `error_message` varchar(500) DEFAULT NULL,
+    `created_at` datetime NOT NULL,
+    PRIMARY KEY (`id`),
+    UNIQUE KEY `uq_caption_translation_set_video_lang_cefr`
+        (`video_id`, `target_language_id`, `cefr_level`),
+    CONSTRAINT `fk_caption_translation_set_video`
+        FOREIGN KEY (`video_id`) REFERENCES `video` (`id`),
+    CONSTRAINT `fk_caption_translation_set_target_language`
+        FOREIGN KEY (`target_language_id`) REFERENCES `language` (`id`)
+);
+
+CREATE TABLE `caption_translation` (
+    `id` int NOT NULL AUTO_INCREMENT,
+    `set_id` int NOT NULL,
+    `caption_id` int NOT NULL,
+    `text_id` int NOT NULL,
+    PRIMARY KEY (`id`),
+    UNIQUE KEY `uq_caption_translation_set_caption` (`set_id`, `caption_id`),
+    CONSTRAINT `fk_caption_translation_set`
+        FOREIGN KEY (`set_id`) REFERENCES `caption_translation_set` (`id`) ON DELETE CASCADE,
+    CONSTRAINT `fk_caption_translation_caption`
+        FOREIGN KEY (`caption_id`) REFERENCES `caption` (`id`),
+    CONSTRAINT `fk_caption_translation_text`
+        FOREIGN KEY (`text_id`) REFERENCES `new_text` (`id`)
+);
diff --git a/zeeguu/api/endpoints/__init__.py b/zeeguu/api/endpoints/__init__.py
@@ -41,6 +41,7 @@
 from .listening_sessions import *
 from . import user_video
 from . import user_watching_session
+from . import caption_translation
 from . import audio_lessons
 from . import article_simplification
 from . import generated_examples

diff --git a/zeeguu/api/endpoints/caption_translation.py b/zeeguu/api/endpoints/caption_translation.py
@@ -0,0 +1,91 @@
+"""Endpoints for the per-video translated-captions feature (v1.5 of share-to-video).
+
+POST kicks off (or returns the existing) per-(video, target_language, cefr) translation set
+and runs the LLM job in a background thread; GET polls the set's status. Once `ready`, the
+reader calls /user_video?caption_set_id=... to get the translated caption block.
+"""
+import flask
+from flask import request
+from sqlalchemy.orm.exc import NoResultFound
+
+from zeeguu.core.model import User, Language
+from zeeguu.core.model.video import Video
+from zeeguu.core.model.caption_translation_set import (
+    CaptionTranslationSet,
+    CEFR_LEVELS,
+    STATUS_READY,
+)
+from zeeguu.core.llm_services.caption_translation_service import translate_set
+from zeeguu.api.utils.background import run_in_background
+from zeeguu.api.utils.json_result import json_result
+from zeeguu.api.utils.route_wrappers import cross_domain, requires_session
+
+from . import api, db_session
+
+
+def _resolve_video_or_404(video_id: int) -> Video:
+    video = Video.find_by_id(video_id)
+    if video is None:
+        flask.abort(404, "video not found")
+    return video
+
+
+def _resolve_language_or_406(code: str) -> Language:
+    try:
+        return Language.find(code)
+    except NoResultFound:
+        flask.abort(406, "Language not supported")
+
+
+def _read_body():
+    data = request.get_json(silent=True) or {}
+    return {
+        "target_language": (data.get("target_language") or request.form.get("target_language") or "").strip(),
+        "target_cefr": (data.get("target_cefr") or request.form.get("target_cefr") or "").strip().upper(),
+    }
+
+
+@api.route("/video/<int:video_id>/translate_captions", methods=["POST"])
+@cross_domain
+@requires_session
+def video_translate_captions(video_id):
+    User.find_by_id(flask.g.user_id)  # validates session and existence
+    video = _resolve_video_or_404(video_id)
+
+    body = _read_body()
+    if not body["target_language"]:
+        flask.abort(400, "target_language required")
+    if body["target_cefr"] not in CEFR_LEVELS:
+        flask.abort(400, f"target_cefr must be one of {CEFR_LEVELS}")
+    target_language = _resolve_language_or_406(body["target_language"])
+
+    if target_language.code == video.language.code:
+        flask.abort(400, "target_language matches the video's caption language")
+
+    # Idempotent: the second request for the same (video, language, cefr) returns the existing
+    # set without re-translating. If already ready, no background job — caller polls and goes.
+    translation_set = CaptionTranslationSet.find_or_create(
+        db_session, video, target_language, body["target_cefr"]
+    )
+
+    if translation_set.status != STATUS_READY:
+        run_in_background(translate_set, translation_set.id)
+
+    return json_result(translation_set.as_dictionary()), 202
+
+
+@api.route("/video/<int:video_id>/translate_captions/status", methods=["GET"])
+@cross_domain
+@requires_session
+def video_translate_captions_status(video_id):
+    User.find_by_id(flask.g.user_id)
+    video = _resolve_video_or_404(video_id)
+
+    set_id = request.args.get("set_id")
+    if not set_id:
+        flask.abort(400, "set_id required")
+    translation_set = CaptionTranslationSet.find_by_id(int(set_id))
+    if translation_set is None or translation_set.video_id != video.id:
+        flask.abort(404, "translation set not found for this video")
+
+    return json_result(translation_set.as_dictionary())
diff --git a/zeeguu/api/endpoints/user_video.py b/zeeguu/api/endpoints/user_video.py
@@ -1,6 +1,10 @@
 import flask
 from flask import request
 from zeeguu.core.model import User, UserVideo, Video
+from zeeguu.core.model.caption_translation_set import (
+    CaptionTranslationSet,
+    STATUS_READY,
+)
 
 from zeeguu.api.utils.route_wrappers import cross_domain, requires_session
 from zeeguu.api.utils.json_result import json_result
@@ -24,7 +28,26 @@ def get_user_video():
     user = User.find_by_id(flask.g.user_id)
     new_user_video = UserVideo.find_or_create(db_session, user, video)
 
-    return json_result(new_user_video.user_video_info(user, video, with_content=True))
+    # Optional translated-caption track. If the set isn't ready yet (still translating, errored,
+    # or doesn't belong to this video) we silently serve the original captions — the reader
+    # polls the dedicated status endpoint and re-fetches when ready, so the worst UX is a
+    # one-cycle delay rather than a 4xx during a known-async wait.
+    translation_set = None
+    caption_set_id = request.args.get("caption_set_id")
+    if caption_set_id:
+        candidate = CaptionTranslationSet.find_by_id(int(caption_set_id))
+        if (
+            candidate
+            and candidate.video_id == video.id
+            and candidate.status == STATUS_READY
+        ):
+            translation_set = candidate
+
+    return json_result(
+        new_user_video.user_video_info(
+            user, video, with_content=True, translation_set=translation_set
+        )
+    )
 
 
 # ---------------------------------------------------------------------------

diff --git a/zeeguu/core/llm_services/caption_translation_service.py b/zeeguu/core/llm_services/caption_translation_service.py
@@ -0,0 +1,183 @@
+"""Translate a video's captions into the learner's target language at their CEFR level.
+
+Per-segment translation preserves the original `time_start`/`time_end` of each `Caption`, so
+the player's timing logic is unchanged — only the rendered text and tokenization differ.
+
+LLM strategy: batches of ~30 captions per Haiku call (cheap and fast), structured JSON output
+keyed by numeric marker; on parse / missing-key failure we fall back to a single-caption call
+for the affected items so partial LLM failures degrade gracefully instead of zeroing the set.
+"""
+from __future__ import annotations
+
+import json
+import re
+from typing import Iterable, Optional
+
+from zeeguu.core.model.db import db
+from zeeguu.core.model.caption import Caption
+from zeeguu.core.model.caption_translation import CaptionTranslation
+from zeeguu.core.model.caption_translation_set import CaptionTranslationSet
+from zeeguu.core.llm_services.haiku_client import haiku_completion
+from zeeguu.logging import log
+
+
+BATCH_SIZE = 30
+BATCH_MAX_TOKENS = 2000  # generous; ~30 short captions translated easily fit
+SINGLE_MAX_TOKENS = 200
+
+
+def _batched(items, n):
+    for i in range(0, len(items), n):
+        yield items[i : i + n]
+
+
+def _strip_code_fence(text: str) -> str:
+    text = text.strip()
+    if text.startswith("```"):
+        text = re.sub(r"^```(?:json)?\s*", "", text)
+        text = re.sub(r"\s*```$", "", text)
+    return text.strip()
+
+
+def _build_batch_prompt(
+    captions: list[Caption], source_language: str, target_language: str, cefr: str
+) -> str:
+    numbered = "\n".join(f"[{i + 1}] {c.get_content()}" for i, c in enumerate(captions))
+    return f"""Translate each of the following {source_language} subtitle lines into {target_language} at CEFR level {cefr}.
+
+Rules:
+- Preserve meaning faithfully; favor natural, idiomatic {target_language}.
+- Adapt vocabulary and grammar to CEFR {cefr} (simpler words for A1-A2, intermediate for B1-B2, advanced for C1-C2).
+- One line per input line — do NOT merge or split lines.
+- Output STRICTLY a single JSON object, nothing else (no markdown fences, no commentary):
+{{"1": "translation of line 1", "2": "translation of line 2", ...}}
+
+Lines to translate:
+{numbered}
+"""
+
+
+def _build_single_prompt(
+    text: str, source_language: str, target_language: str, cefr: str
+) -> str:
+    return (
+        f"Translate the following {source_language} subtitle into {target_language} "
+        f"at CEFR level {cefr}. Output ONLY the translation — no quotes, no commentary.\n\n"
+        f"{text}"
+    )
+
+
+def _translate_batch(
+    captions: list[Caption], source_language: str, target_language: str, cefr: str
+) -> dict[int, str]:
+    """Returns {1-based index in `captions` -> translation}. Missing keys mean the LLM didn't
+    provide a translation for that line; callers should fall back per-caption for those."""
+    if not captions:
+        return {}
+    prompt = _build_batch_prompt(captions, source_language, target_language, cefr)
+    raw = haiku_completion(prompt, max_tokens=BATCH_MAX_TOKENS, temperature=0.1)
+    if not raw:
+        return {}
+    try:
+        # `strict=False` because LLMs sometimes embed literal newlines in JSON string values
+        # (which `json.loads` strict mode rejects). Matches the simplification_service fix.
+        parsed = json.loads(_strip_code_fence(raw), strict=False)
+    except (json.JSONDecodeError, ValueError) as e:
+        log(f"[caption_translation] batch JSON parse failed: {e}")
+        return {}
+    if not isinstance(parsed, dict):
+        return {}
+    out: dict[int, str] = {}
+    for k, v in parsed.items():
+        try:
+            idx = int(str(k).strip())
+        except ValueError:
+            continue
+        if isinstance(v, str) and v.strip():
+            out[idx] = v.strip()
+    return out
+
+
+def _translate_one(
+    text: str, source_language: str, target_language: str, cefr: str
+) -> Optional[str]:
+    raw = haiku_completion(
+        _build_single_prompt(text, source_language, target_language, cefr),
+        max_tokens=SINGLE_MAX_TOKENS,
+        temperature=0.1,
+    )
+    if not raw:
+        return None
+    cleaned = raw.strip().strip('"').strip()
+    return cleaned or None
+
+
+def translate_set(set_id: int) -> None:
+    """Background-job entry point. Translates every caption in the set's video and stores the
+    rows. Idempotent at the row level: existing CaptionTranslations for the set are skipped so
+    a retried run resumes instead of duplicating."""
+    translation_set = CaptionTranslationSet.find_by_id(set_id)
+    if translation_set is None:
+        log(f"[caption_translation] no set with id {set_id}")
+        return
+
+    try:
+        translation_set.mark_translating()
+        db.session.commit()
+
+        video = translation_set.video
+        source_language = video.language.code
+        target_language = translation_set.target_language.code
+        cefr = translation_set.cefr_level
+
+        captions = sorted(video.captions, key=lambda c: c.time_start)
+        if not captions:
+            translation_set.mark_error("Video has no captions to translate.")
+            db.session.commit()
+            return
+
+        already_done = {
+            ct.caption_id
+            for ct in CaptionTranslation.query.filter_by(set_id=translation_set.id).all()
+        }
+        todo = [c for c in captions if c.id not in already_done]
+        log(
+            f"[caption_translation] set={translation_set.id} translating "
+            f"{len(todo)}/{len(captions)} captions ({source_language} -> {target_language}, {cefr})"
+        )
+
+        for batch in _batched(todo, BATCH_SIZE):
+            batch_translations = _translate_batch(
+                batch, source_language, target_language, cefr
+            )
+            for i, caption in enumerate(batch, start=1):
+                text = batch_translations.get(i)
+                if not text:
+                    # Per-caption fallback for items the batch call dropped or mis-keyed.
+                    text = _translate_one(
+                        caption.get_content(), source_language, target_language, cefr
+                    )
+                if not text:
+                    # Last resort: skip this caption rather than fail the whole set; the
+                    # reader will show the original text for un-translated lines.
+                    log(
+                        f"[caption_translation] dropped caption {caption.id} "
+                        f"(set={translation_set.id}) — LLM returned nothing"
+                    )
+                    continue
+                CaptionTranslation.create(
+                    db.session, translation_set, caption, text
+                )
+            db.session.commit()
+
+        translation_set.mark_ready()
+        db.session.commit()
+        log(f"[caption_translation] set={translation_set.id} ready")
+    except Exception as e:  # noqa: BLE001 — background job; surface via status row
+        log(f"[caption_translation] set={set_id} error: {e}")
+        db.session.rollback()
+        # Reload after rollback to mark the set's error state cleanly.
+        translation_set = CaptionTranslationSet.find_by_id(set_id)
+        if translation_set:
+            translation_set.mark_error(str(e))
+            db.session.commit()
diff --git a/zeeguu/core/model/__init__.py b/zeeguu/core/model/__init__.py
@@ -88,6 +88,8 @@
 from .yt_channel import YTChannel
 from .video import Video
 from .caption import Caption
+from .caption_translation_set import CaptionTranslationSet
+from .caption_translation import CaptionTranslation
 from .video_tag import VideoTag
 from .video_tag_map import VideoTagMap
 from .video_caption_context import VideoCaptionContext