Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 41 additions & 16 deletions zeeguu/api/endpoints/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import json
from zeeguu.logging import log

import os
import math
import requests
from io import BytesIO
Expand Down Expand Up @@ -90,7 +91,7 @@ def _og_preview_html(og_title, description, page_url, image_url):
<meta property="og:description" content="{desc}">
<meta property="og:url" content="{page_url}">
<meta property="og:image" content="{image_url}">
<meta property="og:image:type" content="image/png">
<meta property="og:image:type" content="image/jpeg">
<meta property="og:image:width" content="1200">
<meta property="og:image:height" content="630">
<meta property="og:image:alt" content="{title}">
Expand All @@ -108,46 +109,70 @@ def _og_preview_html(og_title, description, page_url, image_url):
"""


def _ensure_article_card(view):
"""Render + cache the article card if it's not already on disk; return its
path, or None on a transient image-fetch failure (caller serves an uncached
fallback). The remote photo is downloaded only on a cache miss."""
article_id = view.get("article_id")
if not article_id:
return None
path = og_image.cached_article_card_path(ZEEGUU_DATA_FOLDER, article_id)
if os.path.exists(path):
return path
url = view.get("image_url")
photo = _fetch_preview_image(url)
if url and photo is None:
return None # transient fetch failure — don't poison the cache
return og_image.ensure_cached_article_card(view, ZEEGUU_DATA_FOLDER, photo)


@api.route("/shared_article_preview/<int:article_id>", methods=["GET"])
@cross_domain
def shared_article_preview(article_id):
"""Crawler-facing HTML with Open Graph tags for a shared article link. nginx
routes social-scraper user-agents on zeeguu.org/read/article?id=<id> here;
real users get the SPA. Public — article content is already public."""
real users get the SPA. Public — article content is already public.

We warm the card image here so that when the crawler fetches og:image moments
later it's a fast cache hit — otherwise the cold render (which downloads the
article's photo) can race the crawler's image-fetch timeout."""
article = Article.find_by_id(article_id)
page_url = f"{SHARE_WEB_ORIGIN}/read/article?id={article_id}"
if not article:
return flask.redirect(page_url, code=302)
og_title, description = _article_preview_texts(_article_card_view(article))
image_url = f"{SHARE_API_ORIGIN}/shared_article_image/{article_id}.png"
view = _article_card_view(article)
og_title, description = _article_preview_texts(view)
image_url = f"{SHARE_API_ORIGIN}/shared_article_image/{article_id}.jpg"
try:
_ensure_article_card(view) # best-effort cache warm
except Exception as e:
log(f"[shared_article_preview] card warm failed for {article_id}: {e}")
response = flask.Response(
_og_preview_html(og_title, description, page_url, image_url), mimetype="text/html")
response.headers["Cache-Control"] = "public, max-age=3600"
return response


@api.route("/shared_article_image/<int:article_id>.png", methods=["GET"])
@api.route("/shared_article_image/<int:article_id>.jpg", methods=["GET"])
@cross_domain
def shared_article_image(article_id):
"""1200x630 OG card for a shared article — its own photo under a scrim, or
the branded fallback. Rendered once and cached on disk."""
"""1200x630 OG card (JPEG) for a shared article — its own photo under a
scrim, or the branded fallback. Rendered once and cached on disk."""
article = Article.find_by_id(article_id)
if not article:
return flask.Response("Not found", status=404)
view = _article_card_view(article)
url = view.get("image_url")
photo = _fetch_preview_image(url)
if url and photo is None:
# Transient image-fetch failure: serve the fallback now but DON'T cache it,
# so the photo card appears once the fetch later succeeds. Short TTL.
path = _ensure_article_card(view)
if path is None:
# Transient image-fetch failure: serve the fallback now but DON'T cache
# it, so the photo card appears once the fetch later succeeds. Short TTL.
buffer = BytesIO()
og_image.render_article_card(view, None).save(buffer, format="PNG")
og_image.render_article_card(view, None).save(buffer, format="JPEG", quality=85)
buffer.seek(0)
response = flask.send_file(buffer, mimetype="image/png")
response = flask.send_file(buffer, mimetype="image/jpeg")
response.headers["Cache-Control"] = "public, max-age=300"
return response
path = og_image.ensure_cached_article_card(view, ZEEGUU_DATA_FOLDER, photo)
response = flask.send_file(path, mimetype="image/png")
response = flask.send_file(path, mimetype="image/jpeg")
response.headers["Cache-Control"] = "public, max-age=86400"
return response

Expand Down
8 changes: 5 additions & 3 deletions zeeguu/core/audio_lessons/og_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,17 +281,19 @@ def render_article_card(view, photo=None):


def cached_article_card_path(data_folder, article_id):
return os.path.join(data_folder, "og-images", "shared-articles", f"{article_id}.png")
# JPEG, not PNG: the card embeds a photo, so JPEG is ~5x smaller — crawlers
# fetch it faster and are far less likely to skip it on a cold first scrape.
return os.path.join(data_folder, "og-images", "shared-articles", f"{article_id}.jpg")


def ensure_cached_article_card(view, data_folder, photo=None):
"""Render (once) and cache the article card PNG; return its path, or None if
"""Render (once) and cache the article card JPEG; return its path, or None if
the view has no article_id."""
article_id = view.get("article_id")
if not article_id:
return None
path = cached_article_card_path(data_folder, article_id)
if not os.path.exists(path):
os.makedirs(os.path.dirname(path), exist_ok=True)
render_article_card(view, photo).save(path, format="PNG")
render_article_card(view, photo).save(path, format="JPEG", quality=85, optimize=True)
return path
Loading