Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions tools/backfill_daily_audio_subscriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python
"""
One-time backfill: create a DailyAudioSubscription for every (user, language)
that has a legacy `daily_audio_lesson_type_<lang>` UserPreference, copying the
type and verbatim suggestion. Idempotent — skips a (user, language) that already
has a subscription. Run once after the 26-05-29-b migration.

source api/.venv/bin/activate && python -m tools.backfill_daily_audio_subscriptions [--dry-run]
"""

import argparse

parser = argparse.ArgumentParser(description="Backfill daily audio subscriptions from preferences")
parser.add_argument("--dry-run", action="store_true", help="Report without writing")
args = parser.parse_args()

from zeeguu.api.app import create_app_for_scripts
from zeeguu.core.model import db

app = create_app_for_scripts()
app.app_context().push()

from zeeguu.core.model import User, Language, UserPreference, DailyAudioSubscription

TYPE_PREFIX = UserPreference.DAILY_AUDIO_LESSON_TYPE_PREFIX
VALID_TYPES = ("three_words_lesson", "topic", "situation")

type_prefs = UserPreference.query.filter(
UserPreference.key.like(f"{TYPE_PREFIX}%")
).all()

created = skipped_exists = skipped_invalid = skipped_no_lang = 0

for pref in type_prefs:
lesson_type = (pref.value or "").strip()
if lesson_type not in VALID_TYPES:
skipped_invalid += 1
continue

lang_code = pref.key[len(TYPE_PREFIX):]
try:
language = Language.find_or_create(lang_code) # handles cn→zh-CN
except Exception as e:
print(f" ! could not resolve language {lang_code!r} (user {pref.user_id}): {e}")
skipped_no_lang += 1
continue

user = User.find_by_id(pref.user_id)
if not user:
continue

if DailyAudioSubscription.find(user, language) is not None:
skipped_exists += 1
continue

raw_suggestion = UserPreference.get(
user, UserPreference.daily_audio_lesson_suggestion_key(lang_code)
)
raw_suggestion = (raw_suggestion or "").strip() or None

print(f" + user {user.id} [{language.code}] {lesson_type}: {raw_suggestion or '-'}")
if not args.dry_run:
db.session.add(DailyAudioSubscription(user, language, lesson_type, raw_suggestion))
created += 1

if not args.dry_run:
db.session.commit()

print("=" * 60)
print(f"created: {created}")
print(f"skipped (already had subscription): {skipped_exists}")
print(f"skipped (invalid type): {skipped_invalid}")
print(f"skipped (unresolved language): {skipped_no_lang}")
if args.dry_run:
print("[DRY RUN] nothing written")
57 changes: 31 additions & 26 deletions tools/generate_daily_audio_lessons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,23 @@
Pre-generate each opted-in user's DAILY audio lesson so it's waiting for them
when they open the app — no spinner, just press play.

A user "opts in" by configuring a daily lesson in the app, which stores a
per-language preference:
daily_audio_lesson_type_<lang> three_words_lesson | topic | situation
daily_audio_lesson_suggestion_<lang> the verbatim subject they typed (topic/situation)
A user "opts in" by configuring a daily lesson in the app, which creates a
DailyAudioSubscription(user, learned_language) holding the lesson type, subject,
on/off flag, and schedule (daily, or a weekday mask).

For each recently-active user that has a daily lesson configured for their
currently-learned language and no lesson yet for their local "today", we run
the same generation pipeline the on-demand endpoint uses
(DailyLessonGenerator.prepare_lesson_generation + generate_daily_lesson),
synchronously. Re-running is safe: users who already have today's lesson are
skipped.

The frontend still generates on demand (first day / cron miss / odd timezone),
so this job is a pure latency optimization, not a correctness requirement.
For each recently-active user with an ENABLED subscription for their currently
-learned language, we generate today's lesson unless: it isn't a scheduled day,
they already have one for their local "today", or generation is PAUSED because
the most recent lesson wasn't engaged with (DailyAudioLesson.waiting_paused_for,
from #643 — avoids piling up unheard lessons). Generation reuses the on-demand
pipeline (prepare_lesson_generation + generate_daily_lesson), synchronously.

Usage:
python generate_daily_audio_lessons.py [--send-email] [--dry-run] [--days N] [--user-id ID]
"""

import argparse
from datetime import datetime
from datetime import datetime, timezone, timedelta

parser = argparse.ArgumentParser(
description="Pre-generate daily audio lessons for opted-in active users"
Expand Down Expand Up @@ -56,8 +52,8 @@
db,
User,
UserWord,
UserPreference,
DailyAudioLesson,
DailyAudioSubscription,
AudioLessonGenerationProgress,
)
from zeeguu.core.audio_lessons.daily_lesson_generator import DailyLessonGenerator
Expand Down Expand Up @@ -209,33 +205,42 @@ def timeout_handler(signum, frame):
continue

try:
lang_code = user.learned_language.code
lesson_type, raw_suggestion = UserPreference.get_daily_audio_lesson_config(user, lang_code)
language = user.learned_language
sub = DailyAudioSubscription.find(user, language)

if not lesson_type:
counts["not-opted-in"] += 1
# No subscription, or turned off → not generating for this user.
if sub is None or not sub.enabled:
counts["not-subscribed"] += 1
continue
lesson_type, raw_suggestion = sub.lesson_type, sub.raw_suggestion
if lesson_type not in VALID_LESSON_TYPES:
output(f"{index}. {user.name}: invalid stored lesson_type {lesson_type!r} — skipping")
output(f"{index}. {user.name}: invalid lesson_type {lesson_type!r} — skipping")
counts["skipped"] += 1
continue

subject = raw_suggestion or ("study words" if lesson_type == "three_words_lesson" else "?")
timezone_offset = user_timezone_offset_minutes(user)
today_local = datetime.now(timezone(timedelta(minutes=timezone_offset))).date()

# Not a scheduled day for this subscription (e.g. Mon/Wed/Fri schedule).
if not sub.scheduled_on(today_local):
counts["not-due"] += 1
continue

if DRY_RUN:
# Read-only: don't create a progress record or generate.
# Read-only: don't create a progress record or generate. Engagement
# pause + today-exists gates are #643's (reused here).
if generator.today_lesson_exists(user, timezone_offset):
output(f"{index}. {user.name} [{user.learned_language.name}] — already has today's lesson")
output(f"{index}. {user.name} [{language.name}] — already has today's lesson")
counts["exists"] += 1
continue
if DailyAudioLesson.waiting_paused_for(user, user.learned_language.id):
output(f"{index}. {user.name} [{user.learned_language.name}] — paused (last lesson < 50% listened)")
if DailyAudioLesson.waiting_paused_for(user, language.id):
output(f"{index}. {user.name} [{language.name}] — paused (last lesson < 50% listened)")
counts["paused"] += 1
continue
output(f"{index}. {user.name} [{user.learned_language.name}] — WOULD generate {lesson_type}: {subject}")
output(f"{index}. {user.name} [{language.name}] — WOULD generate {lesson_type}: {subject}")
counts["would-generate"] += 1
language_breakdown[user.learned_language.name] += 1
language_breakdown[language.name] += 1
continue

outcome = generate_for_user(user, lesson_type, raw_suggestion, timezone_offset)
Expand Down
21 changes: 21 additions & 0 deletions tools/migrations/26-05-29--add_daily_audio_subscription.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- First-class per-(user, language) daily audio subscription: config + on/off +
-- schedule. The single source of truth for WHAT to generate and WHEN, replacing
-- the daily_audio_lesson_type_<lang> / _suggestion_<lang> UserPreference rows.
-- The engagement "pause" is NOT stored here; it is computed from the latest
-- lesson (DailyAudioLesson.waiting_paused_for / is_engaged, from #643).
-- A row = subscribed; enabled=0 = turned off (config remembered).
CREATE TABLE daily_audio_subscription (
id INT AUTO_INCREMENT PRIMARY KEY,
user_id INT NOT NULL,
language_id INT NOT NULL,
enabled BOOLEAN NOT NULL DEFAULT TRUE,
lesson_type VARCHAR(20) NOT NULL,
raw_suggestion VARCHAR(128) DEFAULT NULL,
schedule_kind VARCHAR(20) NOT NULL DEFAULT 'daily',
weekday_mask SMALLINT DEFAULT 127,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
CONSTRAINT uq_daily_audio_subscription_user_lang UNIQUE (user_id, language_id),
CONSTRAINT fk_das_user FOREIGN KEY (user_id) REFERENCES user(id) ON DELETE CASCADE,
CONSTRAINT fk_das_language FOREIGN KEY (language_id) REFERENCES language(id)
) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- Align the daily-audio suggestion length to 128 across the board (was 100 on
-- the lesson, 255 on the new subscription). 128 is the single cap enforced at
-- the API/mirror input, so widen the lesson columns to match and avoid
-- truncation when a long subject flows from the subscription into generation.
ALTER TABLE daily_audio_lesson
MODIFY COLUMN raw_suggestion VARCHAR(128) DEFAULT NULL,
MODIFY COLUMN canonical_suggestion VARCHAR(128) DEFAULT NULL;
137 changes: 135 additions & 2 deletions zeeguu/api/endpoints/audio_lessons.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@
SHARE_API_ORIGIN = "https://api.zeeguu.org"


def _resolve_subscription_language(user, lang_code):
"""Pick the language to act on for daily-audio subscription / today's-lesson
endpoints. Prefer an explicit code from the client (avoids races where the
user just switched language and the server's learned_language hasn't caught
up); fall back to user.learned_language for clients that don't send it yet.
Returns the Language object, or None if the code is unknown."""
from sqlalchemy.orm.exc import NoResultFound
from zeeguu.core.model import Language

code = (lang_code or "").strip()
if not code:
return user.learned_language
if code == "cn":
code = "zh-CN"
try:
return Language.find(code)
except NoResultFound:
return None


def _generate_lesson_in_background(user_id, preparation):
"""
Run lesson generation in a background thread (called via run_in_background).
Expand Down Expand Up @@ -102,7 +122,7 @@ def generate_daily_lesson():

# Get timezone offset from form data (default to 0 for UTC)
timezone_offset = flask.request.form.get("timezone_offset", 0, type=int)
suggestion = flask.request.form.get("suggestion", "").strip()[:80].strip() or None
suggestion = flask.request.form.get("suggestion", "").strip()[:128].strip() or None
lesson_type = flask.request.form.get("lesson_type", "").strip() or THREE_WORDS_LESSON
if lesson_type not in VALID_LESSON_TYPES:
return json_result({"error": f"Invalid lesson_type: {lesson_type}"}), 400
Expand Down Expand Up @@ -300,17 +320,25 @@ def get_todays_lesson():

Query parameters:
- timezone_offset (optional): Client's timezone offset in minutes from UTC
- language (optional): explicit learned-language code (e.g. "da"). When
absent, falls back to user.learned_language — but a fresh client should
send the code it's currently displaying, to avoid races on language switch.
"""
user = User.find_by_id(flask.g.user_id)
generator = DailyLessonGenerator()

# Get timezone offset from query parameter (default to 0 for UTC)
timezone_offset = flask.request.args.get("timezone_offset", 0, type=int)
language = _resolve_subscription_language(user, flask.request.args.get("language"))
if language is None:
return json_result({"error": "Unknown language"}), 400

# include_paused: when there's no lesson today but the last one wasn't
# engaged with (< halfway), surface it flagged `paused` so the app shows the
# waiting lesson rather than triggering a new generation.
result = generator.get_todays_lesson_for_user(user, timezone_offset, include_paused=True)
result = generator.get_todays_lesson_for_user(
user, timezone_offset, include_paused=True, language=language
)

# Check if there's a specific status code to return
status_code = result.pop("status_code", 200)
Expand Down Expand Up @@ -493,3 +521,108 @@ def get_audio_lesson_generation_progress():
db.session.commit()

return json_result({"progress": progress.to_dict()})


@api.route("/set_daily_subscription_enabled", methods=["POST"])
@cross_domain
@requires_session
def set_daily_subscription_enabled():
"""Turn the daily audio subscription off/on for the current learned language.
Config (type/subject/schedule) is remembered while off, so turning back on
is one tap.

Form data:
- enabled: "true" | "false"
- language (optional): explicit learned-language code; falls back to
user.learned_language. Send the code the UI is showing to avoid races.
"""
from zeeguu.core.model import DailyAudioSubscription

user = User.find_by_id(flask.g.user_id)
enabled = flask.request.form.get("enabled", "true").strip().lower() in ("1", "true", "yes")
language = _resolve_subscription_language(user, flask.request.form.get("language"))
if language is None:
return json_result({"error": "Unknown language"}), 400

sub = DailyAudioSubscription.find(user, language)
if sub is None:
return json_result({"error": "No daily subscription to update"}), 404

sub.set_enabled(enabled)
db.session.commit()
return json_result({"subscription_status": "active" if enabled else "off"}), 200


def _subscription_to_dict(sub):
if sub is None:
return {
"lesson_type": None,
"raw_suggestion": None,
"enabled": False,
"schedule_kind": None,
"weekday_mask": None,
}
return {
"lesson_type": sub.lesson_type,
"raw_suggestion": sub.raw_suggestion,
"enabled": sub.enabled,
"schedule_kind": sub.schedule_kind,
"weekday_mask": sub.weekday_mask,
}


@api.route("/daily_subscription", methods=["GET"])
@cross_domain
@requires_session
def get_daily_subscription():
"""Current daily-audio subscription config. Source of truth for the configure
dialog; replaces the legacy daily_audio_lesson_*_<lang> preference reads.

Query parameters:
- language (optional): explicit learned-language code; falls back to
user.learned_language. Send the code the UI is showing to avoid races.

Returns nulls/false when not subscribed."""
from zeeguu.core.model import DailyAudioSubscription

user = User.find_by_id(flask.g.user_id)
language = _resolve_subscription_language(user, flask.request.args.get("language"))
if language is None:
return json_result({"error": "Unknown language"}), 400
sub = DailyAudioSubscription.find(user, language)
return json_result(_subscription_to_dict(sub)), 200


@api.route("/configure_daily_subscription", methods=["POST"])
@cross_domain
@requires_session
def configure_daily_subscription():
"""Upsert the daily-audio subscription. Replaces the legacy
/save_user_preferences write path for daily-audio config.

Form data:
- lesson_type: three_words_lesson | topic | situation
- suggestion: verbatim subject (topic/situation), trimmed to 128 chars
- language (optional): explicit learned-language code; falls back to
user.learned_language. Send the code the UI is showing to avoid writing
the new config to the wrong language after a fast switch.
"""
from zeeguu.core.model import DailyAudioSubscription

user = User.find_by_id(flask.g.user_id)
lesson_type = flask.request.form.get("lesson_type", "").strip() or THREE_WORDS_LESSON
if lesson_type not in VALID_LESSON_TYPES:
return json_result({"error": f"Invalid lesson_type: {lesson_type}"}), 400
raw_suggestion = (flask.request.form.get("suggestion", "") or "").strip()[:128] or None
language = _resolve_subscription_language(user, flask.request.form.get("language"))
if language is None:
return json_result({"error": "Unknown language"}), 400

sub = DailyAudioSubscription.find(user, language)
if sub is None:
sub = DailyAudioSubscription(user, language, lesson_type, raw_suggestion)
db.session.add(sub)
else:
sub.configure(lesson_type, raw_suggestion)
db.session.commit()
return json_result(_subscription_to_dict(sub)), 200
Loading
Loading