From 1a1dab3daab5dfd3071855f5e7ca221df79891de Mon Sep 17 00:00:00 2001
From: Mircea Lungu <mircea.lungu@gmail.com>
Date: Sun, 31 May 2026 21:08:26 +0200
Subject: [PATCH 1/2] feat(video): caption_translation_set +
 caption_translation models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tables to hold per-(video, target_language, target_cefr) translated subtitles for a shared
video. Per-segment translation preserves the original Caption.time_start/time_end so the
reader's timing/sync logic is unchanged — only the rendered text is in the learner's language.

- caption_translation_set: the bundle, with status (pending/translating/ready/error) for the
  async job, error_message, and a UNIQUE(video_id, target_language_id, cefr_level) so a
  second request for the same target deduplicates instead of re-translating.
- caption_translation: one row per original Caption inside a set, pointing at a NewText row
  for the translated content. UNIQUE(set_id, caption_id) so retried jobs resume cleanly.

Mirrors the DailyAudioLesson ↔ DailyAudioLessonSegment shape already in the codebase.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../26-05-31-a--add_caption_translation.sql   |  37 ++++++
 zeeguu/core/model/__init__.py                 |   2 +
 zeeguu/core/model/caption_translation.py      |  48 ++++++++
 zeeguu/core/model/caption_translation_set.py  | 116 ++++++++++++++++++
 4 files changed, 203 insertions(+)
 create mode 100644 tools/migrations/26-05-31-a--add_caption_translation.sql
 create mode 100644 zeeguu/core/model/caption_translation.py
 create mode 100644 zeeguu/core/model/caption_translation_set.py

diff --git a/tools/migrations/26-05-31-a--add_caption_translation.sql b/tools/migrations/26-05-31-a--add_caption_translation.sql
new file mode 100644
index 00000000..64442394
--- /dev/null
+++ b/tools/migrations/26-05-31-a--add_caption_translation.sql
@@ -0,0 +1,37 @@
+-- v1.5: translated captions for a shared video.
+-- A `caption_translation_set` is a per-(video, target_language, target_cefr) bundle that owns
+-- per-original-caption translated text rows. Timing stays on the parent `caption` rows so we
+-- don't duplicate it (the player aligns by original time_start/time_end). Status drives the
+-- async translation job (mirrors the daily-audio-lesson status pattern).
+
+CREATE TABLE `caption_translation_set` (
+    `id` int NOT NULL AUTO_INCREMENT,
+    `video_id` int NOT NULL,
+    `target_language_id` int NOT NULL,
+    `cefr_level` enum('A1','A2','B1','B2','C1','C2') NOT NULL,
+    `status` enum('pending','translating','ready','error') NOT NULL DEFAULT 'pending',
+    `error_message` varchar(500) DEFAULT NULL,
+    `created_at` datetime NOT NULL,
+    PRIMARY KEY (`id`),
+    UNIQUE KEY `uq_caption_translation_set_video_lang_cefr`
+        (`video_id`, `target_language_id`, `cefr_level`),
+    CONSTRAINT `fk_caption_translation_set_video`
+        FOREIGN KEY (`video_id`) REFERENCES `video` (`id`),
+    CONSTRAINT `fk_caption_translation_set_target_language`
+        FOREIGN KEY (`target_language_id`) REFERENCES `language` (`id`)
+);
+
+CREATE TABLE `caption_translation` (
+    `id` int NOT NULL AUTO_INCREMENT,
+    `set_id` int NOT NULL,
+    `caption_id` int NOT NULL,
+    `text_id` int NOT NULL,
+    PRIMARY KEY (`id`),
+    UNIQUE KEY `uq_caption_translation_set_caption` (`set_id`, `caption_id`),
+    CONSTRAINT `fk_caption_translation_set`
+        FOREIGN KEY (`set_id`) REFERENCES `caption_translation_set` (`id`) ON DELETE CASCADE,
+    CONSTRAINT `fk_caption_translation_caption`
+        FOREIGN KEY (`caption_id`) REFERENCES `caption` (`id`),
+    CONSTRAINT `fk_caption_translation_text`
+        FOREIGN KEY (`text_id`) REFERENCES `new_text` (`id`)
+);
diff --git a/zeeguu/core/model/__init__.py b/zeeguu/core/model/__init__.py
index e6533ae9..87fce1f3 100644
--- a/zeeguu/core/model/__init__.py
+++ b/zeeguu/core/model/__init__.py
@@ -88,6 +88,8 @@
 from .yt_channel import YTChannel
 from .video import Video
 from .caption import Caption
+from .caption_translation_set import CaptionTranslationSet
+from .caption_translation import CaptionTranslation
 from .video_tag import VideoTag
 from .video_tag_map import VideoTagMap
 from .video_caption_context import VideoCaptionContext
diff --git a/zeeguu/core/model/caption_translation.py b/zeeguu/core/model/caption_translation.py
new file mode 100644
index 00000000..f1c4c53f
--- /dev/null
+++ b/zeeguu/core/model/caption_translation.py
@@ -0,0 +1,48 @@
+"""A single translated caption — translated text for one original Caption inside a set.
+
+Timing (time_start / time_end) is read from the original Caption; we only store the new text.
+"""
+from zeeguu.core.model.db import db
+from zeeguu.core.model.caption import Caption
+from zeeguu.core.model.new_text import NewText
+
+
+class CaptionTranslation(db.Model):
+    __tablename__ = "caption_translation"
+    __table_args__ = (
+        db.UniqueConstraint("set_id", "caption_id", name="uq_caption_translation_set_caption"),
+        {"mysql_collate": "utf8_bin"},
+    )
+
+    id = db.Column(db.Integer, primary_key=True)
+
+    set_id = db.Column(
+        db.Integer, db.ForeignKey("caption_translation_set.id"), nullable=False
+    )
+    translation_set = db.relationship(
+        "CaptionTranslationSet", back_populates="translations"
+    )
+
+    caption_id = db.Column(db.Integer, db.ForeignKey(Caption.id), nullable=False)
+    caption = db.relationship(Caption, foreign_keys="CaptionTranslation.caption_id")
+
+    text_id = db.Column(db.Integer, db.ForeignKey(NewText.id), nullable=False)
+    text = db.relationship(NewText, foreign_keys="CaptionTranslation.text_id")
+
+    def __init__(self, translation_set, caption, text):
+        self.translation_set = translation_set
+        self.caption = caption
+        self.text = text
+
+    def __repr__(self):
+        return f"<CaptionTranslation set={self.set_id} caption={self.caption_id}>"
+
+    def get_content(self):
+        return self.text.get_content()
+
+    @classmethod
+    def create(cls, session, translation_set, caption, translated_text: str):
+        text_row = NewText.find_or_create(session, translated_text, False)
+        row = cls(translation_set=translation_set, caption=caption, text=text_row)
+        session.add(row)
+        return row
diff --git a/zeeguu/core/model/caption_translation_set.py b/zeeguu/core/model/caption_translation_set.py
new file mode 100644
index 00000000..fe7b3dd8
--- /dev/null
+++ b/zeeguu/core/model/caption_translation_set.py
@@ -0,0 +1,116 @@
+"""A per-(video, target_language, target_cefr) bundle of translated captions.
+
+Owns the async-job status so the reader can poll while translation runs in the background.
+Timing is NOT stored here — it stays on the original Caption rows so we don't duplicate it.
+"""
+from datetime import datetime
+
+from sqlalchemy.orm.exc import NoResultFound
+
+from zeeguu.core.model.db import db
+from zeeguu.core.model.language import Language
+from zeeguu.core.model.video import Video
+
+
+CEFR_LEVELS = ("A1", "A2", "B1", "B2", "C1", "C2")
+STATUS_PENDING = "pending"
+STATUS_TRANSLATING = "translating"
+STATUS_READY = "ready"
+STATUS_ERROR = "error"
+
+
+class CaptionTranslationSet(db.Model):
+    __tablename__ = "caption_translation_set"
+    __table_args__ = (
+        db.UniqueConstraint(
+            "video_id",
+            "target_language_id",
+            "cefr_level",
+            name="uq_caption_translation_set_video_lang_cefr",
+        ),
+        {"mysql_collate": "utf8_bin"},
+    )
+
+    id = db.Column(db.Integer, primary_key=True)
+
+    video_id = db.Column(db.Integer, db.ForeignKey(Video.id), nullable=False)
+    video = db.relationship(Video)
+
+    target_language_id = db.Column(db.Integer, db.ForeignKey(Language.id), nullable=False)
+    target_language = db.relationship(Language)
+
+    cefr_level = db.Column(
+        db.Enum(*CEFR_LEVELS, name="cefr_level_enum"), nullable=False
+    )
+
+    status = db.Column(
+        db.Enum(
+            STATUS_PENDING, STATUS_TRANSLATING, STATUS_READY, STATUS_ERROR,
+            name="caption_translation_set_status",
+        ),
+        nullable=False,
+        default=STATUS_PENDING,
+    )
+    error_message = db.Column(db.String(500))
+    created_at = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
+
+    translations = db.relationship(
+        "CaptionTranslation", back_populates="translation_set", cascade="all, delete-orphan"
+    )
+
+    def __init__(self, video, target_language, cefr_level):
+        self.video = video
+        self.target_language = target_language
+        self.cefr_level = cefr_level
+        self.status = STATUS_PENDING
+        self.created_at = datetime.utcnow()
+
+    def __repr__(self):
+        return (
+            f"<CaptionTranslationSet video={self.video_id} "
+            f"lang={self.target_language_id} cefr={self.cefr_level} status={self.status}>"
+        )
+
+    def mark_translating(self):
+        self.status = STATUS_TRANSLATING
+        self.error_message = None
+
+    def mark_ready(self):
+        self.status = STATUS_READY
+        self.error_message = None
+
+    def mark_error(self, message: str):
+        self.status = STATUS_ERROR
+        self.error_message = (message or "")[:500]
+
+    def as_dictionary(self):
+        return {
+            "id": self.id,
+            "video_id": self.video_id,
+            "target_language": self.target_language.code,
+            "cefr_level": self.cefr_level,
+            "status": self.status,
+            "error_message": self.error_message,
+        }
+
+    @classmethod
+    def find_or_create(cls, session, video, target_language, cefr_level):
+        """Idempotent: a second request for the same (video, lang, cefr) returns the existing
+        set so callers can poll status without re-translating."""
+        try:
+            return (
+                cls.query.filter_by(
+                    video_id=video.id,
+                    target_language_id=target_language.id,
+                    cefr_level=cefr_level,
+                ).one()
+            )
+        except NoResultFound:
+            new_set = cls(video=video, target_language=target_language, cefr_level=cefr_level)
+            session.add(new_set)
+            session.commit()
+            return new_set
+
+    @classmethod
+    def find_by_id(cls, set_id: int):
+        return cls.query.filter_by(id=set_id).first()

From 2df32876b6831dec442e0b56f7714a8a067de887 Mon Sep 17 00:00:00 2001
From: Mircea Lungu <mircea.lungu@gmail.com>
Date: Sun, 31 May 2026 21:08:42 +0200
Subject: [PATCH 2/2] feat(video): translate a shared video's captions to the
 learner's language
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per the v1.5 plan: when a learner shares a YouTube video whose captions are in a different
language, offer to translate the captions to the learner's language at their CEFR level,
preserving the original per-segment timing so the existing interactive reader (tap-to-translate,
bookmarks, time-synced highlight) keeps working unchanged. Audio is unaffected; only the reading
surface changes.

- New service core/llm_services/caption_translation_service.translate_set(set_id):
  batches ~30 captions per Haiku call with structured JSON output (numeric markers), falls
  back to per-caption translation when a batch's parsing or alignment fails so partial LLM
  failures degrade gracefully instead of zeroing the set. Reuses the existing haiku_client.
- New endpoints in api/endpoints/caption_translation.py:
  - POST /video/<id>/translate_captions  — find_or_create the set, kick off the background
    job via run_in_background, return 202 + set dict. Idempotent.
  - GET  /video/<id>/translate_captions/status?set_id=  — for the reader's polling loop.
- Extended /user_video to accept optional caption_set_id; when the set is ready and belongs
  to the requested video, Video.video_info substitutes translated text + retokenises in the
  target language. context_identifier still references the original caption id so bookmark
  anchoring is stable across track switches. If the set isn't ready, we silently serve the
  original captions — the reader's separate status poll drives the eventual refetch.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 zeeguu/api/endpoints/__init__.py              |   1 +
 zeeguu/api/endpoints/caption_translation.py   |  91 +++++++++
 zeeguu/api/endpoints/user_video.py            |  25 ++-
 .../caption_translation_service.py            | 183 ++++++++++++++++++
 zeeguu/core/model/user_video.py               |   7 +-
 zeeguu/core/model/video.py                    |  27 ++-
 6 files changed, 328 insertions(+), 6 deletions(-)
 create mode 100644 zeeguu/api/endpoints/caption_translation.py
 create mode 100644 zeeguu/core/llm_services/caption_translation_service.py

diff --git a/zeeguu/api/endpoints/__init__.py b/zeeguu/api/endpoints/__init__.py
index 4d3b2777..72ec971f 100644
--- a/zeeguu/api/endpoints/__init__.py
+++ b/zeeguu/api/endpoints/__init__.py
@@ -41,6 +41,7 @@
 from .listening_sessions import *
 from . import user_video
 from . import user_watching_session
+from . import caption_translation
 from . import audio_lessons
 from . import article_simplification
 from . import generated_examples
diff --git a/zeeguu/api/endpoints/caption_translation.py b/zeeguu/api/endpoints/caption_translation.py
new file mode 100644
index 00000000..2bc3dad0
--- /dev/null
+++ b/zeeguu/api/endpoints/caption_translation.py
@@ -0,0 +1,91 @@
+"""Endpoints for the per-video translated-captions feature (v1.5 of share-to-video).
+
+POST kicks off (or returns the existing) per-(video, target_language, cefr) translation set
+and runs the LLM job in a background thread; GET polls the set's status. Once `ready`, the
+reader calls /user_video?caption_set_id=... to get the translated caption block.
+"""
+import flask
+from flask import request
+from sqlalchemy.orm.exc import NoResultFound
+
+from zeeguu.core.model import User, Language
+from zeeguu.core.model.video import Video
+from zeeguu.core.model.caption_translation_set import (
+    CaptionTranslationSet,
+    CEFR_LEVELS,
+    STATUS_READY,
+)
+from zeeguu.core.llm_services.caption_translation_service import translate_set
+from zeeguu.api.utils.background import run_in_background
+from zeeguu.api.utils.json_result import json_result
+from zeeguu.api.utils.route_wrappers import cross_domain, requires_session
+
+from . import api, db_session
+
+
+def _resolve_video_or_404(video_id: int) -> Video:
+    video = Video.find_by_id(video_id)
+    if video is None:
+        flask.abort(404, "video not found")
+    return video
+
+
+def _resolve_language_or_406(code: str) -> Language:
+    try:
+        return Language.find(code)
+    except NoResultFound:
+        flask.abort(406, "Language not supported")
+
+
+def _read_body():
+    data = request.get_json(silent=True) or {}
+    return {
+        "target_language": (data.get("target_language") or request.form.get("target_language") or "").strip(),
+        "target_cefr": (data.get("target_cefr") or request.form.get("target_cefr") or "").strip().upper(),
+    }
+
+
+@api.route("/video/<int:video_id>/translate_captions", methods=["POST"])
+@cross_domain
+@requires_session
+def video_translate_captions(video_id):
+    User.find_by_id(flask.g.user_id)  # validates session and existence
+    video = _resolve_video_or_404(video_id)
+
+    body = _read_body()
+    if not body["target_language"]:
+        flask.abort(400, "target_language required")
+    if body["target_cefr"] not in CEFR_LEVELS:
+        flask.abort(400, f"target_cefr must be one of {CEFR_LEVELS}")
+    target_language = _resolve_language_or_406(body["target_language"])
+
+    if target_language.code == video.language.code:
+        flask.abort(400, "target_language matches the video's caption language")
+
+    # Idempotent: the second request for the same (video, language, cefr) returns the existing
+    # set without re-translating. If already ready, no background job — caller polls and goes.
+    translation_set = CaptionTranslationSet.find_or_create(
+        db_session, video, target_language, body["target_cefr"]
+    )
+
+    if translation_set.status != STATUS_READY:
+        run_in_background(translate_set, translation_set.id)
+
+    return json_result(translation_set.as_dictionary()), 202
+
+
+@api.route("/video/<int:video_id>/translate_captions/status", methods=["GET"])
+@cross_domain
+@requires_session
+def video_translate_captions_status(video_id):
+    User.find_by_id(flask.g.user_id)
+    video = _resolve_video_or_404(video_id)
+
+    set_id = request.args.get("set_id")
+    if not set_id:
+        flask.abort(400, "set_id required")
+    translation_set = CaptionTranslationSet.find_by_id(int(set_id))
+    if translation_set is None or translation_set.video_id != video.id:
+        flask.abort(404, "translation set not found for this video")
+
+    return json_result(translation_set.as_dictionary())
diff --git a/zeeguu/api/endpoints/user_video.py b/zeeguu/api/endpoints/user_video.py
index b4d92faa..32c3cd31 100644
--- a/zeeguu/api/endpoints/user_video.py
+++ b/zeeguu/api/endpoints/user_video.py
@@ -1,6 +1,10 @@
 import flask
 from flask import request
 from zeeguu.core.model import User, UserVideo, Video
+from zeeguu.core.model.caption_translation_set import (
+    CaptionTranslationSet,
+    STATUS_READY,
+)
 
 from zeeguu.api.utils.route_wrappers import cross_domain, requires_session
 from zeeguu.api.utils.json_result import json_result
@@ -24,7 +28,26 @@ def get_user_video():
     user = User.find_by_id(flask.g.user_id)
     new_user_video = UserVideo.find_or_create(db_session, user, video)
 
-    return json_result(new_user_video.user_video_info(user, video, with_content=True))
+    # Optional translated-caption track. If the set isn't ready yet (still translating, errored,
+    # or doesn't belong to this video) we silently serve the original captions — the reader
+    # polls the dedicated status endpoint and re-fetches when ready, so the worst UX is a
+    # one-cycle delay rather than a 4xx during a known-async wait.
+    translation_set = None
+    caption_set_id = request.args.get("caption_set_id")
+    if caption_set_id:
+        candidate = CaptionTranslationSet.find_by_id(int(caption_set_id))
+        if (
+            candidate
+            and candidate.video_id == video.id
+            and candidate.status == STATUS_READY
+        ):
+            translation_set = candidate
+
+    return json_result(
+        new_user_video.user_video_info(
+            user, video, with_content=True, translation_set=translation_set
+        )
+    )
 
 
 # ---------------------------------------------------------------------------
diff --git a/zeeguu/core/llm_services/caption_translation_service.py b/zeeguu/core/llm_services/caption_translation_service.py
new file mode 100644
index 00000000..679fbd2b
--- /dev/null
+++ b/zeeguu/core/llm_services/caption_translation_service.py
@@ -0,0 +1,183 @@
+"""Translate a video's captions into the learner's target language at their CEFR level.
+
+Per-segment translation preserves the original `time_start`/`time_end` of each `Caption`, so
+the player's timing logic is unchanged — only the rendered text and tokenization differ.
+
+LLM strategy: batches of ~30 captions per Haiku call (cheap and fast), structured JSON output
+keyed by numeric marker; on parse / missing-key failure we fall back to a single-caption call
+for the affected items so partial LLM failures degrade gracefully instead of zeroing the set.
+"""
+from __future__ import annotations
+
+import json
+import re
+from typing import Iterable, Optional
+
+from zeeguu.core.model.db import db
+from zeeguu.core.model.caption import Caption
+from zeeguu.core.model.caption_translation import CaptionTranslation
+from zeeguu.core.model.caption_translation_set import CaptionTranslationSet
+from zeeguu.core.llm_services.haiku_client import haiku_completion
+from zeeguu.logging import log
+
+
+BATCH_SIZE = 30
+BATCH_MAX_TOKENS = 2000  # generous; ~30 short captions translated easily fit
+SINGLE_MAX_TOKENS = 200
+
+
+def _batched(items, n):
+    for i in range(0, len(items), n):
+        yield items[i : i + n]
+
+
+def _strip_code_fence(text: str) -> str:
+    text = text.strip()
+    if text.startswith("```"):
+        text = re.sub(r"^```(?:json)?\s*", "", text)
+        text = re.sub(r"\s*```$", "", text)
+    return text.strip()
+
+
+def _build_batch_prompt(
+    captions: list[Caption], source_language: str, target_language: str, cefr: str
+) -> str:
+    numbered = "\n".join(f"[{i + 1}] {c.get_content()}" for i, c in enumerate(captions))
+    return f"""Translate each of the following {source_language} subtitle lines into {target_language} at CEFR level {cefr}.
+
+Rules:
+- Preserve meaning faithfully; favor natural, idiomatic {target_language}.
+- Adapt vocabulary and grammar to CEFR {cefr} (simpler words for A1-A2, intermediate for B1-B2, advanced for C1-C2).
+- One line per input line — do NOT merge or split lines.
+- Output STRICTLY a single JSON object, nothing else (no markdown fences, no commentary):
+{{"1": "translation of line 1", "2": "translation of line 2", ...}}
+
+Lines to translate:
+{numbered}
+"""
+
+
+def _build_single_prompt(
+    text: str, source_language: str, target_language: str, cefr: str
+) -> str:
+    return (
+        f"Translate the following {source_language} subtitle into {target_language} "
+        f"at CEFR level {cefr}. Output ONLY the translation — no quotes, no commentary.\n\n"
+        f"{text}"
+    )
+
+
+def _translate_batch(
+    captions: list[Caption], source_language: str, target_language: str, cefr: str
+) -> dict[int, str]:
+    """Returns {1-based index in `captions` -> translation}. Missing keys mean the LLM didn't
+    provide a translation for that line; callers should fall back per-caption for those."""
+    if not captions:
+        return {}
+    prompt = _build_batch_prompt(captions, source_language, target_language, cefr)
+    raw = haiku_completion(prompt, max_tokens=BATCH_MAX_TOKENS, temperature=0.1)
+    if not raw:
+        return {}
+    try:
+        # `strict=False` because LLMs sometimes embed literal newlines in JSON string values
+        # (which `json.loads` strict mode rejects). Matches the simplification_service fix.
+        parsed = json.loads(_strip_code_fence(raw), strict=False)
+    except (json.JSONDecodeError, ValueError) as e:
+        log(f"[caption_translation] batch JSON parse failed: {e}")
+        return {}
+    if not isinstance(parsed, dict):
+        return {}
+    out: dict[int, str] = {}
+    for k, v in parsed.items():
+        try:
+            idx = int(str(k).strip())
+        except ValueError:
+            continue
+        if isinstance(v, str) and v.strip():
+            out[idx] = v.strip()
+    return out
+
+
+def _translate_one(
+    text: str, source_language: str, target_language: str, cefr: str
+) -> Optional[str]:
+    raw = haiku_completion(
+        _build_single_prompt(text, source_language, target_language, cefr),
+        max_tokens=SINGLE_MAX_TOKENS,
+        temperature=0.1,
+    )
+    if not raw:
+        return None
+    cleaned = raw.strip().strip('"').strip()
+    return cleaned or None
+
+
+def translate_set(set_id: int) -> None:
+    """Background-job entry point. Translates every caption in the set's video and stores the
+    rows. Idempotent at the row level: existing CaptionTranslations for the set are skipped so
+    a retried run resumes instead of duplicating."""
+    translation_set = CaptionTranslationSet.find_by_id(set_id)
+    if translation_set is None:
+        log(f"[caption_translation] no set with id {set_id}")
+        return
+
+    try:
+        translation_set.mark_translating()
+        db.session.commit()
+
+        video = translation_set.video
+        source_language = video.language.code
+        target_language = translation_set.target_language.code
+        cefr = translation_set.cefr_level
+
+        captions = sorted(video.captions, key=lambda c: c.time_start)
+        if not captions:
+            translation_set.mark_error("Video has no captions to translate.")
+            db.session.commit()
+            return
+
+        already_done = {
+            ct.caption_id
+            for ct in CaptionTranslation.query.filter_by(set_id=translation_set.id).all()
+        }
+        todo = [c for c in captions if c.id not in already_done]
+        log(
+            f"[caption_translation] set={translation_set.id} translating "
+            f"{len(todo)}/{len(captions)} captions ({source_language} -> {target_language}, {cefr})"
+        )
+
+        for batch in _batched(todo, BATCH_SIZE):
+            batch_translations = _translate_batch(
+                batch, source_language, target_language, cefr
+            )
+            for i, caption in enumerate(batch, start=1):
+                text = batch_translations.get(i)
+                if not text:
+                    # Per-caption fallback for items the batch call dropped or mis-keyed.
+                    text = _translate_one(
+                        caption.get_content(), source_language, target_language, cefr
+                    )
+                if not text:
+                    # Last resort: skip this caption rather than fail the whole set; the
+                    # reader will show the original text for un-translated lines.
+                    log(
+                        f"[caption_translation] dropped caption {caption.id} "
+                        f"(set={translation_set.id}) — LLM returned nothing"
+                    )
+                    continue
+                CaptionTranslation.create(
+                    db.session, translation_set, caption, text
+                )
+            db.session.commit()
+
+        translation_set.mark_ready()
+        db.session.commit()
+        log(f"[caption_translation] set={translation_set.id} ready")
+    except Exception as e:  # noqa: BLE001 — background job; surface via status row
+        log(f"[caption_translation] set={set_id} error: {e}")
+        db.session.rollback()
+        # Reload after rollback to mark the set's error state cleanly.
+        translation_set = CaptionTranslationSet.find_by_id(set_id)
+        if translation_set:
+            translation_set.mark_error(str(e))
+            db.session.commit()
diff --git a/zeeguu/core/model/user_video.py b/zeeguu/core/model/user_video.py
index e534813c..812a0d6c 100644
--- a/zeeguu/core/model/user_video.py
+++ b/zeeguu/core/model/user_video.py
@@ -128,13 +128,16 @@ def exists(cls, obj):
 
     @classmethod
     def user_video_info(
-        cls, user: User, video: Video, with_content=False, with_translations=True
+        cls, user: User, video: Video, with_content=False, with_translations=True,
+        translation_set=None,
     ):
         from zeeguu.core.model.bookmark import Bookmark
         from zeeguu.core.model.video_title_context import VideoTitleContext
         from zeeguu.core.model.user_activitiy_data import UserActivityData
 
-        returned_info = video.video_info(with_content=with_content)
+        returned_info = video.video_info(
+            with_content=with_content, translation_set=translation_set
+        )
         user_video_info = UserVideo.find(user, video)
         # user_diff_feedback = VideoDifficultyFeedback.find(user, video)
         # user_topics_feedback = VideoTopicsFeedback.find_given_user_video(user, video)
diff --git a/zeeguu/core/model/video.py b/zeeguu/core/model/video.py
index d391d855..dd6eee9e 100644
--- a/zeeguu/core/model/video.py
+++ b/zeeguu/core/model/video.py
@@ -239,7 +239,12 @@ def topics_as_tuple(self):
             topics.append((topic.topic.title, topic.origin_type))
         return topics
 
-    def video_info(self, with_content=False):
+    def video_info(self, with_content=False, translation_set=None):
+        """If `translation_set` is given, each caption's `text`/`tokenized_text` come from the
+        translated caption in the user's target language at the set's CEFR; timings and the
+        `context_identifier` (still keyed by the original caption id) are unchanged, so the
+        player's timing logic and the bookmark anchor are stable across original/translated
+        views. Captions missing a translation in the set fall back to the original text."""
         text = self.get_content()
         summary = text[:MAX_CHAR_COUNT_IN_SUMMARY].replace("\n", " ") + "..."
         result_dict = dict(
@@ -269,13 +274,29 @@ def video_info(self, with_content=False):
         if with_content:
             from zeeguu.core.mwe import tokenize_for_reading
 
+            translations_by_caption_id = {}
+            caption_language = self.language
+            if translation_set is not None:
+                translations_by_caption_id = {
+                    ct.caption_id: ct.get_content()
+                    for ct in translation_set.translations
+                }
+                caption_language = translation_set.target_language
+                result_dict["caption_set"] = {
+                    "id": translation_set.id,
+                    "target_language": translation_set.target_language.code,
+                    "cefr_level": translation_set.cefr_level,
+                }
+
             result_dict["captions"] = [
                 {
                     "time_start": caption.time_start / 1000,  # convert to seconds
                     "time_end": caption.time_end / 1000,
-                    "text": caption.get_content(),
+                    "text": translations_by_caption_id.get(caption.id, caption.get_content()),
                     "tokenized_text": tokenize_for_reading(
-                        caption.get_content(), self.language, mode="stanza"
+                        translations_by_caption_id.get(caption.id, caption.get_content()),
+                        caption_language,
+                        mode="stanza",
                     ),
                     "context_identifier": ContextIdentifier(
                         ContextType.VIDEO_CAPTION, video_caption_id=caption.id