From e3e079c2c4856c71fbf47722c69d6e38a02bd413 Mon Sep 17 00:00:00 2001 From: Ronny Roland Date: Wed, 20 May 2026 14:03:57 -0700 Subject: [PATCH 1/2] Pull introspect.py from anyscale/docs at a pinned SHA introspect.py and util.py were duplicated between this repo (scripts/introspect.py + scripts/util.py) and anyscale/docs (scripts/docgen/introspect.py + scripts/docgen/util.py). They diverged once already (the --allow-duplicate-models flag landed here ahead of the docs side), and there was no enforcement to keep them aligned. Delete the local copies. archive_version.sh now fetches both files from anyscale/docs at the SHA stored in .docs-introspect-sha and runs them out of scripts/.docgen/ (gitignored). The docs repo is the single source of truth; we get drift-detection for free because changes there have to be intentionally pulled here. Fetch uses `gh api` rather than raw.githubusercontent.com so the private docs repo is reachable. Locally that needs a `gh auth login` session. The archive workflow exports GH_TOKEN from secrets.DOCS_DISPATCH_TOKEN; that token's contents:read scope on anyscale/docs is the new requirement (if it doesn't already carry that scope, swap to a dedicated secret in the workflow file). Bumping introspect: edit .docs-introspect-sha to the new docs commit and merge. Next archive run picks it up. Co-Authored-By: Claude Opus 4.7 (1M context) --- .docs-introspect-sha | 1 + .github/workflows/archive.yml | 8 + .gitignore | 2 + README.md | 14 +- scripts/__pycache__/util.cpython-310.pyc | Bin 9038 -> 0 bytes scripts/archive_version.sh | 34 +- scripts/introspect.py | 665 ----------------------- scripts/util.py | 283 ---------- 8 files changed, 53 insertions(+), 954 deletions(-) create mode 100644 .docs-introspect-sha create mode 100644 .gitignore delete mode 100644 scripts/__pycache__/util.cpython-310.pyc delete mode 100644 scripts/introspect.py delete mode 100644 scripts/util.py diff --git a/.docs-introspect-sha b/.docs-introspect-sha new file mode 100644 index 0000000..7c02df6 --- /dev/null +++ b/.docs-introspect-sha @@ -0,0 +1 @@ +f6eb9f97dfd21135c407658ac911ced0ed0bd097 diff --git a/.github/workflows/archive.yml b/.github/workflows/archive.yml index 19208fe..b67a9cc 100644 --- a/.github/workflows/archive.yml +++ b/.github/workflows/archive.yml @@ -102,6 +102,14 @@ jobs: - name: Archive each missing version if: steps.missing.outputs.versions != '' + env: + # archive_version.sh fetches introspect.py + util.py from + # anyscale/docs at the SHA pinned in .docs-introspect-sha, + # via `gh api`. That needs a token with contents:read on + # anyscale/docs. The existing DOCS_DISPATCH_TOKEN is reused + # if it carries that scope; otherwise add a separate secret + # and reference it here. + GH_TOKEN: ${{ secrets.DOCS_DISPATCH_TOKEN }} run: | set -e for v in ${{ steps.missing.outputs.versions }}; do diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bf71a00 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +scripts/.docgen/ diff --git a/README.md b/README.md index 112de22..802e6b5 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,14 @@ https://cdn.jsdelivr.net/gh/anyscale/api-docs-schema@latest/pages.json 4. Regenerates `versions.json`. 5. Commits, pushes, and tags. jsDelivr's `@latest` picks up the new tag within ~15 minutes. +## Where introspect.py lives + +The introspector this repo runs against each anyscale wheel is **not** stored here. It lives in `anyscale/docs` at `scripts/docgen/introspect.py` (plus its `util.py` sibling) and powers current-version reference rendering there too. `scripts/archive_version.sh` downloads both files at run time from a SHA pinned in [`.docs-introspect-sha`](./.docs-introspect-sha), so the docs repo stays the single source of truth and the two surfaces can't silently drift. + +Because `anyscale/docs` is a private repo, the fetch uses `gh api` rather than raw.githubusercontent.com. Locally that means having an authenticated `gh` session (`gh auth login`). In CI, the archive workflow exports `GH_TOKEN` from `secrets.DOCS_DISPATCH_TOKEN`; that token needs `contents: read` scope on `anyscale/docs`. + +To pick up an introspect change after it lands in the docs repo, edit `.docs-introspect-sha` with the new commit SHA and merge. The next archive run uses it. + ## Manual regeneration ``` @@ -37,13 +45,13 @@ python3 scripts/update_manifests.py ## Layout ``` -.json # one per anyscale release (0.26.46 - 0.26.100 today) +.json # one per anyscale release versions.json # array of versions, sorted desc pages.json # {version: [page_names]} map for the docs redirect generator +.docs-introspect-sha # pinned anyscale/docs commit that scripts/archive_version.sh + # pulls introspect.py + util.py from scripts/ - introspect.py # reads the installed anyscale wheel, emits reference.json archive_json.py # post-processes reference.json into the schema served at .json - util.py # shared helpers archive_version.sh # one-shot wrapper used by the workflow and humans update_manifests.py # regenerates versions.json and pages.json .github/workflows/ diff --git a/scripts/__pycache__/util.cpython-310.pyc b/scripts/__pycache__/util.cpython-310.pyc deleted file mode 100644 index bad1cdd2462fb1202e33a93b27a320fddaacc76c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9038 zcma)B&2t+^cE^0+gCIpw6h(=$Y`4v|NmvBvIGZn8D_fD2vTTX+ij>!ZNCN|OgB){! zfqDj#01v8?l{f23RSr4iw52MtDmf_mAMyvJD%U+sC8wNx$jOIPsdlaWUiSW)ARa${@!c<#ED`Cf9nTRU;g&(OyfqiPt` zw>-0Is$Z*W;n()8wq3Q`*=n|(tLEDIYQ9~l7BDv}CU{Ovv}~T|g|F>ukxz_9ihL3y zlVc-O`~*g(_%!C6Oy=eI3_pd@6MXi_=xIKO(dp4Vo1fwH7&$pMa+aUN$jsQt0zZ$D zQ~Z_cEWaQ?ov>TxLIyOR292-s*YI?Xzm6|4TyG>a6!@Eb5l_y5R_sR2r#=0IzlG8H zW20~LC5)b(uy~m-i##u>?@XxgyfgNGfnVe+UuUc5xV4cfRR%XVb{#1=>xHi8hOP)$ z%a@G%&7d5(Tu^UEbl9@n38f!&MKfdBC-W74NixXYS;yX$vECsf;TSlj14Pq6d@tfvUCuS(Gp z*hN#UHa#~A_}*%zQmI#pMTWnPP`cfKg}Z{)D{Wq9;!!9%0Z3&|hpTZ<>^RLntCyXw zTW3M9)p8%Pz-M>w+zfEr++|Ln);jj7RU)t#igMHMwA>y2deiqjryB?s`s~N&gF+&3 z6O>8k;o-L2j-sm9g&c&_X7yWY-<5RVb>;@l6Ri*o2z`(z`1|yRT1|?F(hUXMb-G<_ zlR2SUjC3HCbi)VZ9NA}n$Lnhv1{PQ|uxSU7_>>fF|AEkqC>gQqlE(#%}ha_z)x(4 zkj@c>u`!&*r4*)xPO30NBQk=CD6^4?;93?kJI2sN&*Jv4aCa@Uy)d*wBeEVFJiC{b zZ%5X?d62=cxo<_*fnj9s&*}cYeUQ=j+}O`XS`Bl~&!L7l{f9y}9Y|9=5a1a0%bC*( zVK1amEmsEN3hPP{2>HP6>@dgk%1wx$IZf$z`t2am!n*;?5*m>hTcXLK8+tHOs80Z+ zgQ9zV2I=n#hhs4;0tF?*Z8(y7PD6OXniBHTRjqjJs#3fqR=&(eV_F%PRxslEJ)S&N z-3t2!HbBMzwScEki#Z;&jQ17kJt&Rd@ABoJvG-UL{v;^TRJJF&cqb@B_h1@JS8v{+ zPi$QKWNAgyv~l&uEewgUS%IVJEQM@Ggp3PlvELV5E7~)IN%8^TQnUUo$-b0}vq2aB zDz@M_;=CIuD&m~dU>;{Z5<+Z$>ARhhEvNBHHA{jBVhc)CwW+U^Q#VtX5T6LZO>zk} zlHO?y1+!?11>3NVqA_F48B@lLSv2GVhL30*QI^A{H0~TaVP@Z;@CD6-1|3-Y*2qr4c}zXvh}{Q*IV=dB24*FC z0u|_lZp($*wWNRO5tRPaD_RX?TW~GV9mnkiA#3&|)GbUF3D%A%L{ljQDai|}q{VsE zw~?&xh`{g3reODbe&_^Rv7q{Du$&Ag%5Y_82ayDUgR>i;#epQ)-N4xq`YmVlxJ;zd z<8n_uPW(~v-WN`J@NW6fwl6+2$|XBCrHHMd*O2p&2o#3A`cp$`!*SUc_h2xMTHy6| zmM~d4m~#dBI=xs#gs8z@;o)POqEwSqF#V%GIUExGcl4ns^BY8`$oNg>iA8Y=f;TBf z4Ssv2)7(XDUGDgB#mZb+eX(9rk*dD9xX8BZUmbpH$iAYF4Y%rPSi0p(=}R4#lK9h= z@bk@3hw6F_)>ESuz#M$3zLuyAc&dDl-UPd-q(zHE4Q}3`nAwKgfbW3KrlzedNVzeX zE+~eRl@7xIxvY*5JT*^w5}TIi(L6QcycAu}X^N6584bTd4F!QQJDVj6lW!)JzK0o# z=kq!F7W#1!5jo-c4{}?+iJ9Q*e)~K@bzKkhuHWM-qC9#-f`)7lRy}f15;e9 zJDq;ebUaZ})=+7}5r(498Uj0lUmiVIkCUercFk`>S9`JtCigeUzA6F95TZG9!32-B5`^^L&Lu)I+#W zWn@b3N>ZtiIGJgCjg9Prs~xh3Zn&$KQOGiA`tWXPbccz^iy&66P;<--PHBp3hdTE$ zW@!%CvV#8q#R2DCgp!vrL9unTdyQ#tYQU-<1q_UUuED4^A6WgXMI|q_yX8_@(>5F- z+lL4|YL^;}z(pe;@D-QZh#U%tu3$xed?hSF?a9Ps3c%cG`5vVd1XwG; zw}1>f70|{3G$R;!=l9`)JWs$2>QJwisweaU<+fNCF+WteO2gO@ecFD0SRkhpfs8BV z&qpFD$W`=9R>_JjIIyY(&lyjRxw3$sK$ukvIv7upSX0M}CppddYVH=Pd?0^{#nd3O zlOu8_+Z#cNPjM3=hAd=;MhMXQ7=U#rLm-vF>XtRMhuL9{Kt_~1Fyvd2iP&RCHpQM{ z9`MzQ>?j-MTN=DhMU?Yd&PULjJ1Ws)^?S=C_!Y)o04k?XLL?VdT46|tw(BZ5X(0R2 zdn`cuM_JHe=0NEbq^g+QMXZKBc<>6&11t%>NU$r=mXK+3rNK;s3;Y%fWeOK$;{PD;4&C`dG!))>M{~HWUa_)D=aaSwi6=Z z2*&*Jl1|fJYU>IV0Gq>Dh|ZV$wJdDq_TURy^|pHHk8&l;~)lZT)rXpMZW zf^ElGb^Vdk?jqaN86@fsM3zE7nh7WX9cK$$WlI|MXw;tc6-ED_1=0=CIxw1K5w*!x zx_@z@aT;L7yG!(Huf7bp;Ji#!r=*<-f!FoH#Asj7j8!C;MdTMU3@Els zN(51tQrSaV=143j=c!To6HF=P?(QF%@+2CEYiBO%E4J4w`E25$zhTw1<|6!A*Ax}E$YTihyd zzw<%q@}RJ_b!qDlwze)0CYC?A^v2d!Y3r+jiEDH_#O)BM`xUGuFHrLuHOFw>?2CN( z2TTj7#$e~+lg(-PWcXtOyhnsdp+Ad@FnI7EXi{Mk7N`2b&_M8qP=}~k!NC#;xT>6` z0jHS@QKf>2a|fn;hvxti5c(e*)b3{wGLMaOnW26ERAkB9kRRud0nbM6+URXfg;~*2Dzn?$I7@4Ev za+{m`g``?GG5EU;%E;k@cHPb+RAh0BT($>1v59mCm_qIm&|6+3Ki*zpYZ?*idJ)oQ zDr-;?5v!o`^<%oJ%(SLIR2#hl#c`UbpN*+VY!v@ueK7mQ8tKRO#pSKw;?~29r4QCN zbqVS@{z?;)>O%5WYDgF2tb!HFQ%Dk?3iYwo^}F&cZV~jU0)?uS+Yv!^8W9n zKnhPPP3R%`XLu^Ag?O2^F|^vi2W@MZg@R;4yO)tObQ*zv7X5i{t9}ms3p}g(dGz1p zIn^(qzs&QhKY@OQ7gWE9{!jS?^*Q>%Wvn;Ji;oSAf5<1{6Q}l1z~>o2%sI?3!;_d-<5T-Hzs~&HXqo&3pZ>ZK<*&hX=#(Km#ZQK_d?q~2PeoZi>)OLYl#2=v zGV*Wv>Cszr<8KWoq6zso5#}vWRp?AKfp-20?N5zi)j58aFw+zBvGGeIJQHPrjc_{j z*p&bNjj=x;WxhNMii=T^_9Xu$Jjc(`7}i;MfPgX!1t&Wit;JWs)DlCPEf3hDs)x_FF`1z{sa0uLV}|& zC%8XZiIAIS0*c-!b0_iUeMZHb^mAhbinfc)1)j=Ln6yl%C+f^>MJg#o1@8Fcl3va-MI-xJ6MGnfs24cJ6`(A7z z{>Ryt=R5dm^if`uRM=XN?R6Xs4lEY2BT<(gQq=uJT;EYt|7$pq?FpT5e`9=;d2D@# zNOs@k07_4dbyPX0kFxEl8Jm@NV+%_A)Ra^*kd(M<*Gyh8p14G1B*2u*|AX7_=o}m= zS;jPr&x_9oZya5&atTqu@q){h(OZ%-uGqebL^#fF_JGOdCPoG(^oSr}`ZvB8c@Lv< zi<)g}YSc8SQGkW$l1(%g7LXjBXU4r}#`a+5ZU<)(+@o9t>|bL8U9(_=O$abKHAOXy zqObFUOK9{$6xc${r0iVP22ci|Y(|?9W$Q|Ff;tQ#2rC0?dwKDhQ5u}uUIqq8Pi0X) zS$$?K4!*3g>pRWSc1XgGPp|vo4NAo*vlaY0odd*07qxhO5F-DSUM$pVl&;rmap4ouSCdrrRh^TT zrsQqgoI7|aZ_7xY>f)p8P@DUn2L%S&$7mQhJc5-%ZesX?d swm6%e&D+!G3+Ak8n9oO-S^R$f!u*W6Z2z9dil+H}&N6>*+S9-LUvioNVgLXD diff --git a/scripts/archive_version.sh b/scripts/archive_version.sh index 4361e54..9a29902 100755 --- a/scripts/archive_version.sh +++ b/scripts/archive_version.sh @@ -1,8 +1,18 @@ #!/bin/bash # # Generate the JSON schema for one anyscale version. Installs the -# matching wheel into a cached venv, runs introspect.py, then -# archive_json.py to produce /.json. +# matching wheel into a cached venv, downloads the docs-repo's +# introspect.py at the SHA pinned in .docs-introspect-sha, runs it +# against the venv, then post-processes with archive_json.py to +# produce /.json. +# +# Why pin to a docs-repo SHA: introspect.py is also used to render +# current-version docs in anyscale/docs (`scripts/docgen/introspect.py`). +# Keeping a copy here would drift. Pulling at a pinned SHA makes the +# docs repo the single source of truth without coupling our nightly +# archive to whatever happens to be on master at 4am. Bump +# .docs-introspect-sha when an introspect change in the docs repo +# should propagate here. # # Usage: # ./scripts/archive_version.sh 0.26.100 @@ -39,9 +49,27 @@ if [[ ! -x "$VENV_DIR/bin/python" ]]; then "$VENV_DIR/bin/pip" install -q "anyscale==$ANYSCALE_VERSION" fi +# Fetch introspect.py + util.py from anyscale/docs at the pinned SHA +# via `gh api` (works against the private docs repo unlike a plain +# curl). Downloaded into scripts/.docgen/ (gitignored) so the +# `from util import ...` inside introspect.py resolves against the +# sibling file. +# +# Locally: your `gh auth login` covers it. In the archive workflow: +# GH_TOKEN must be set to a token with contents:read on anyscale/docs. +DOCS_INTROSPECT_SHA="$(tr -d '[:space:]' < "$REPO_ROOT/.docs-introspect-sha")" +DOCGEN_CACHE_DIR="$SCRIPTS_DIR/.docgen" +mkdir -p "$DOCGEN_CACHE_DIR" +for f in introspect.py util.py; do + gh api \ + "repos/anyscale/docs/contents/scripts/docgen/${f}?ref=${DOCS_INTROSPECT_SHA}" \ + -H "Accept: application/vnd.github.raw" \ + > "${DOCGEN_CACHE_DIR}/${f}" +done + # `--allow-duplicate-models` softens the introspector's uniqueness # check (anyscale 0.26.48-0.26.52 had a duplicate CloudDeployment). -"$VENV_DIR/bin/python" "$SCRIPTS_DIR/introspect.py" "$TMP_JSON" --allow-duplicate-models +"$VENV_DIR/bin/python" "$DOCGEN_CACHE_DIR/introspect.py" "$TMP_JSON" --allow-duplicate-models "$VENV_DIR/bin/python" "$SCRIPTS_DIR/archive_json.py" "$TMP_JSON" "$OUT_JSON" "$ANYSCALE_VERSION" echo "Wrote $OUT_JSON" diff --git a/scripts/introspect.py b/scripts/introspect.py deleted file mode 100644 index 8874b8f..0000000 --- a/scripts/introspect.py +++ /dev/null @@ -1,665 +0,0 @@ -"""Walk the installed `anyscale` wheel's docgen registry and emit reference.json. - -The introspector imports `ALL_MODULES` from the installed `anyscale` package -and extracts all the data the renderer needs (CLI command shapes, SDK -signatures, model fields, examples, legacy SDK/model markdown). It writes one -JSON file under docs/reference/_data/ that the renderer consumes. - -Usage: - python -m docgen.introspect -""" -from __future__ import annotations - -import argparse -import inspect -import json -import os -import sys -from dataclasses import fields as dataclass_fields -from typing import Any, Callable, Dict, List, Optional - -from util import escape_mdx_content, strip_sphinx_docstring, type_to_string - - -SCHEMA_VERSION = 1 - - -# Hide semantics -# ============== -# -# A wheel can mark whole CLI commands, SDK methods, model classes, or entire -# docgen modules as hidden so they remain functional but absent from the -# generated reference. This mirrors the parameter-level filter already in -# place (`param["hidden"]` for CLI options, `__hidden_args__` for SDK args -# — see anyscale/product PR #39685). -# -# The signals introspect honors: -# - CLI command: `click.Command.hidden` (Click's native attribute) -# - SDK method: `__hidden__` magic attribute, set by `hidden=True` on the -# `@sdk_command` / `@sdk_docs` / `@sdk_command_v2` / -# `@deprecated_sdk_command` decorators -# - Model class: `__hidden__` class attribute (set directly on the class) -# - Module: `Module.hidden` boolean field -# -# All getattr() lookups fall back to False so older wheels stay introspectable. -# A non-hidden SDK function that references a hidden model raises during -# extraction, since dropping the model would leave a dangling anchor link. - - -def _is_hidden_cli_command(c: Any) -> bool: - return bool(getattr(c, "hidden", False)) - - -def _is_hidden_sdk_command(c: Any) -> bool: - return bool(getattr(c, "__hidden__", False)) - - -def _is_hidden_model(t: Any) -> bool: - return bool(getattr(t, "__hidden__", False)) - - -def _is_hidden_module(m: Any) -> bool: - return bool(getattr(m, "hidden", False)) - - -def _collect_examples(t: Any) -> Dict[str, Optional[str]]: - return { - "yaml": getattr(t, "__doc_yaml_example__", None), - "python": getattr(t, "__doc_py_example__", None), - "cli": getattr(t, "__doc_cli_example__", None), - } - - -def _build_model_index( - all_modules: List[Any], *, allow_duplicates: bool = False -) -> Dict[str, str]: - """Map model class name to the filename of the module that owns it. - - Raises if two modules export a model with the same class name. The link - registry uses the bare class name as the ID (`model/`) on the - assumption that names are globally unique; making this explicit prevents - silent overwrites. - - Set `allow_duplicates=True` to downgrade the error to a stderr warning - and keep the first occurrence — used when introspecting older anyscale - wheels that registered the same model class in two docgen modules - (e.g. `CloudDeployment` in both compute-config-api and cloud across - 0.26.48-0.26.52). - """ - index: Dict[str, str] = {} - duplicates: List[str] = [] - for m in all_modules: - for model in m.models or []: - name = model.__name__ - if name in index and index[name] != m.filename: - msg = ( - f"Model class name '{name}' is exported by both " - f"{index[name]} and {m.filename}. Names must be globally " - f"unique across docgen modules." - ) - if allow_duplicates: - duplicates.append(msg) - continue # keep the first occurrence - raise ValueError(msg) - index[name] = m.filename - if duplicates: - print( - "Warning: duplicate model names (keeping first occurrence):", - file=sys.stderr, - ) - for msg in duplicates: - print(f" {msg}", file=sys.stderr) - return index - - -class _NeverMatch: - """Sentinel type used in isinstance() when an optional product type is - missing from an older wheel. No real object will be an instance of this.""" - - -def _import_command_types(): - """Resolve AnyscaleCommand / DeprecatedAnyscaleCommand / LegacyAnyscaleCommand - from the installed wheel. Older wheels (pre-deprecation, pre-legacy-split) - may be missing some of these types; substitute a never-matching sentinel.""" - from anyscale.commands import util # noqa: PLC0415 - - return ( - getattr(util, "AnyscaleCommand", _NeverMatch), - getattr(util, "DeprecatedAnyscaleCommand", _NeverMatch), - getattr(util, "LegacyAnyscaleCommand", _NeverMatch), - ) - - -def _extract_cli_command( - c: Any, *, default_cli_prefix: str, group_prefix: Optional[str] -) -> Optional[Dict[str, Any]]: - """Capture everything the renderer reads off a Click command. - - Returns None when the command is marked hidden (`click.Command.hidden`), - signaling that the caller should drop it. - """ - import click # noqa: PLC0415 - - if _is_hidden_cli_command(c): - return None - - AnyscaleCommand, DeprecatedAnyscaleCommand, LegacyAnyscaleCommand = ( - _import_command_types() - ) - - ctx = click.Context(command=c) - usage_str = " ".join(c.collect_usage_pieces(ctx)) - info_dict: Dict[str, Any] = c.to_info_dict(ctx) - - cli_prefix = f"{default_cli_prefix} {group_prefix}" if group_prefix else default_cli_prefix - - is_anyscale_command = isinstance( - c, (AnyscaleCommand, DeprecatedAnyscaleCommand, LegacyAnyscaleCommand) - ) - - if isinstance(c, LegacyAnyscaleCommand): - kind = "legacy_cli" - legacy_prefix = c.get_legacy_prefix() - new_c = c.get_new_cli() - new_cli_prefix = c.get_new_prefix() - legacy_meta = { - "is_limited_support": c.is_limited_support(), - "legacy_prefix": legacy_prefix, - "new_cli_name": new_c.name if new_c else None, - "new_cli_prefix": new_cli_prefix, - } - deprecated_meta = None - # Legacy commands override the prefix with their own. - cli_prefix = legacy_prefix or cli_prefix - elif isinstance(c, DeprecatedAnyscaleCommand): - kind = "deprecated" - legacy_meta = None - removal_date = getattr(c, "__removal_date__", None) - formatted_date = ( - c._format_removal_date(removal_date) if removal_date else None # noqa: SLF001 - ) - deprecated_meta = { - "deprecation_message": getattr(c, "__deprecation_message__", None), - "removal_date": formatted_date, - "alternative": getattr(c, "__alternative__", None), - } - elif isinstance(c, AnyscaleCommand) and c.is_alpha: - kind = "alpha" - legacy_meta = None - deprecated_meta = None - elif isinstance(c, AnyscaleCommand) and c.is_beta: - kind = "beta" - legacy_meta = None - deprecated_meta = None - else: - kind = "regular" - legacy_meta = None - deprecated_meta = None - - options = [] - for param in info_dict["params"]: - if param.get("param_type_name") != "option": - continue - # Mirror product-side filter: Click options marked `hidden=True` are - # excluded from `--help` and from the generated reference, even though - # they remain functional. See anyscale/product PR #39685. - if param.get("hidden"): - continue - opts = list(param.get("opts", [])) - secondary_opts = list(param.get("secondary_opts", [])) - options.append( - { - "name": param.get("name"), - "opts": opts, - "secondary_opts": secondary_opts, - "help": param.get("help"), - } - ) - - return { - "name": c.name, - "kind": kind, - "is_anyscale_command": is_anyscale_command, - "cli_prefix": cli_prefix, - "usage": usage_str, - "help": info_dict.get("help"), - "options": options, - "examples": _collect_examples(c), - "legacy_meta": legacy_meta, - "deprecated_meta": deprecated_meta, - } - - -def _assert_no_hidden_model_refs( - *, - sdk_qualname: str, - arg_name: Optional[str], - type_str: str, - hidden_models: set, -) -> None: - """Raise if `type_str` references a model that's marked hidden. - - Dropping a hidden model from the emitted module would leave dangling - `[ModelName](file.md#anchor)` links elsewhere. Catch this at extract time - with a clear error rather than letting it ship as a silent broken anchor. - """ - for model_name in hidden_models: - # `type_to_string` emits the model name inside square brackets when it - # renders a cross-page anchor link, e.g. `[ModelName](file.md#...)`. - # That bracketed form is the unambiguous match. - if f"[{model_name}]" in type_str: - location = ( - f"argument '{arg_name}'" if arg_name else "return type" - ) - raise ValueError( - f"SDK command '{sdk_qualname}' {location} references hidden " - f"model '{model_name}'. Either un-hide the model or also hide " - f"the SDK command." - ) - - -def _extract_sdk_command( - c: Callable, - *, - sdk_prefix: str, - model_index: Dict[str, str], - hidden_models: set, - current_module_filename: str, -) -> Dict[str, Any]: - if not c.__doc__: - raise ValueError( - f"SDK command '{sdk_prefix}.{c.__name__}' is missing a docstring." - ) - - sdk_qualname = f"{sdk_prefix}.{c.__name__}" - signature = inspect.signature(c) - has_any_parameters = len(signature.parameters) > 0 - # SDK decorators in newer wheels can mark args as hidden via - # `hidden_args={...}`, which gets stashed on the wrapped function as - # `__hidden_args__`. Older wheels lack the attribute and we treat the - # set as empty. See anyscale/product PR #39685. - hidden_args = set(getattr(c, "__hidden_args__", set()) or set()) - parameters: List[Dict[str, Any]] = [] - for name, param in signature.parameters.items(): - if name.startswith("_"): - continue - if name in hidden_args: - continue - if param.annotation is inspect.Parameter.empty: - raise AssertionError( - f"SDK command '{sdk_qualname}' is missing a type " - f"hint for argument '{name}'" - ) - type_str = type_to_string( - param.annotation, model_index, current_module_filename - ) - _assert_no_hidden_model_refs( - sdk_qualname=sdk_qualname, - arg_name=name, - type_str=type_str, - hidden_models=hidden_models, - ) - default = ( - None - if param.default is inspect.Parameter.empty - else f"{param.default!s}" - ) - arg_docs = getattr(c, "__arg_docstrings__", {}).get(name, None) - if not arg_docs: - raise ValueError( - f"SDK command '{sdk_qualname}' is missing a " - f"docstring for argument '{name}'" - ) - parameters.append( - { - "name": name, - "type_str": type_str, - "default": default, - "docstring": arg_docs, - } - ) - - return_type_str: Optional[str] = None - if signature.return_annotation is not inspect.Signature.empty: - return_type_str = type_to_string( - signature.return_annotation, model_index, current_module_filename - ) - _assert_no_hidden_model_refs( - sdk_qualname=sdk_qualname, - arg_name=None, - type_str=return_type_str, - hidden_models=hidden_models, - ) - - return { - "name": c.__name__, - "docstring": c.__doc__, - "has_any_parameters": has_any_parameters, - "parameters": parameters, - "return_type_str": return_type_str, - "skip_py_example": getattr(c, "__skip_py_example__", False), - "examples": _collect_examples(c), - } - - -def _import_model_base_types(): - """Resolve ModelBaseType / ModelEnumType. These have been present since - docgen's inception, but we still wrap to keep the failure mode loud.""" - from anyscale._private.models.model_base import ( # noqa: PLC0415 - ModelBaseType, - ModelEnumType, - ) - - return ModelBaseType, ModelEnumType - - -def _extract_model( - t: Any, - *, - model_index: Dict[str, str], - current_module_filename: str, -) -> Dict[str, Any]: - ModelBaseType, ModelEnumType = _import_model_base_types() - - assert isinstance(t, (ModelBaseType, ModelEnumType)) - docstring = t.__doc__ - assert isinstance(docstring, str) - - if isinstance(t, ModelBaseType): - kind = "base" - is_config = t.__name__.endswith("Config") - skip_py_example = getattr(t, "__skip_py_example__", False) - if not skip_py_example and not getattr(t, "__doc_py_example__", None): - raise ValueError(f"Model '{t.__name__}' is missing a '__doc_py_example__'.") - if is_config and not getattr(t, "__doc_yaml_example__", None): - raise ValueError( - f"Config model '{t.__name__}' is missing a '__doc_yaml_example__'." - ) - model_fields: List[Dict[str, Any]] = [] - for field in dataclass_fields(t): - if field.name.startswith("_"): - continue - field_docstring = field.metadata.get("docstring", None) - if not field_docstring: - raise ValueError( - f"Model '{t.__name__}' is missing a docstring for field " - f"'{field.name}'" - ) - model_fields.append( - { - "name": field.name, - "type_str": type_to_string( - field.type, model_index, current_module_filename - ), - "docstring": field_docstring, - "customer_hosted_only": field.metadata.get( - "customer_hosted_only", False - ), - } - ) - return { - "name": t.__name__, - "kind": kind, - "docstring": docstring, - "is_config": is_config, - "skip_py_example": skip_py_example, - "fields": model_fields, - "members": None, - "examples": _collect_examples(t), - } - - # ModelEnumType - members: List[Dict[str, str]] = [] - for value in t.__members__: - if str(value).startswith("_"): - continue - members.append( - {"name": value, "docstring": t.__docstrings__[value]} - ) - return { - "name": t.__name__, - "kind": "enum", - "docstring": docstring, - "is_config": False, - "skip_py_example": False, - "fields": None, - "members": members, - "examples": {"yaml": None, "python": None, "cli": None}, - } - - -def _parse_legacy_sources(docgen_pkg_dir: str) -> Dict[str, Any]: - """Parse api.md and models.md shipped inside the wheel. - - Reuses the wheel's `parse_legacy_sdks` so the markdown transformations - stay byte-identical with the product-side renderer. We capture the - parsed objects' `name` and `docstring` strings only. - """ - from anyscale._private.docgen.generator_legacy import parse_legacy_sdks # noqa: PLC0415 - - api_md = os.path.join(docgen_pkg_dir, "api.md") - models_md = os.path.join(docgen_pkg_dir, "models.md") - legacy_sdks, legacy_models = parse_legacy_sdks(api_md, models_md) - return { - "sdks": [{"name": s.name, "docstring": s.docstring} for s in legacy_sdks], - "models": [{"name": m.name, "docstring": m.docstring} for m in legacy_models], - } - - -def _extract_module( - m: Any, *, model_index: Dict[str, str], hidden_models: set -) -> Dict[str, Any]: - # Resolve group prefix per command using object identity (matching the - # product-side renderer), since multiple commands can share a `name` across - # different group prefixes (e.g. cloud's `setup` exists at the top level and - # also under `cloud resource`). - group_map = m.cli_command_group_prefix or {} - - cli_commands = [] - for c in m.cli_commands or []: - group_prefix = group_map.get(c) - extracted = _extract_cli_command( - c, default_cli_prefix=m.cli_prefix, group_prefix=group_prefix - ) - if extracted is not None: - cli_commands.append(extracted) - - legacy_cli_commands = [] - for c in m.legacy_cli_commands or []: - extracted = _extract_cli_command( - c, - default_cli_prefix=m.legacy_cli_prefix or m.cli_prefix, - group_prefix=None, - ) - if extracted is not None: - legacy_cli_commands.append(extracted) - - sdk_commands = [ - _extract_sdk_command( - c, - sdk_prefix=m.sdk_prefix, - model_index=model_index, - hidden_models=hidden_models, - current_module_filename=m.filename, - ) - for c in m.sdk_commands or [] - if not _is_hidden_sdk_command(c) - ] - - models = [ - _extract_model( - t, model_index=model_index, current_module_filename=m.filename - ) - for t in m.models or [] - if not _is_hidden_model(t) - ] - - legacy_sdk_command_refs: List[Dict[str, Optional[str]]] = [] - if m.legacy_sdk_commands: - for legacy_name, new_sdk in m.legacy_sdk_commands.items(): - legacy_sdk_command_refs.append( - { - "legacy_name": legacy_name, - "new_sdk_name": new_sdk.__name__ if new_sdk else None, - } - ) - - return { - "title": m.title, - "filename": m.filename, - "cli_prefix": m.cli_prefix, - "sdk_prefix": m.sdk_prefix, - "cli_commands": cli_commands, - "sdk_commands": sdk_commands, - "models": models, - "legacy_title": m.legacy_title, - "legacy_cli_prefix": m.legacy_cli_prefix, - "legacy_cli_commands": legacy_cli_commands, - "legacy_sdk_command_refs": legacy_sdk_command_refs, - "legacy_sdk_model_names": list(m.legacy_sdk_models or []), - } - - -def _detect_features() -> Dict[str, bool]: - """Probe the installed wheel for known docgen features. - - These flags are a forward-compat hook: the current renderer doesn't gate on - them (the install will always be a recent enough wheel for `npm run sync`), - but archive runs against older versions can read them and skip features - that didn't exist yet. When eng changes the renderer surface in the future, - add a flag here for it. - """ - from anyscale.commands import util as commands_util # noqa: PLC0415 - from anyscale._private.docgen import generator as wheel_generator # noqa: PLC0415 - import inspect as _inspect # noqa: PLC0415 - - has_hidden_args_decorator = False - has_hidden_method_decorator = False - try: - from anyscale._private.sdk import sdk_command # noqa: PLC0415 - - sdk_command_params = _inspect.signature(sdk_command).parameters - has_hidden_args_decorator = "hidden_args" in sdk_command_params - has_hidden_method_decorator = "hidden" in sdk_command_params - except ImportError: - pass - - module_dataclass = getattr(wheel_generator, "Module", None) - has_hidden_module = False - if module_dataclass is not None: - has_hidden_module = "hidden" in { - f.name for f in _dataclass_fields_or_empty(module_dataclass) - } - - return { - "has_deprecated_commands": hasattr(commands_util, "DeprecatedAnyscaleCommand"), - "has_legacy_anyscale_command": hasattr(commands_util, "LegacyAnyscaleCommand"), - "has_alpha_beta": hasattr(commands_util, "AnyscaleCommand") - and "is_alpha" in dir(commands_util.AnyscaleCommand) - and "is_beta" in dir(commands_util.AnyscaleCommand), - "has_sphinx_stripping": hasattr(wheel_generator, "strip_sphinx_docstring"), - "has_mdx_escaping": hasattr(wheel_generator, "_escape_mdx_content"), - "has_legacy_split": hasattr( - getattr(wheel_generator, "MarkdownGenerator", object), - "_generate_legacy_content", - ), - "has_hidden_args_decorator": has_hidden_args_decorator, - "has_hidden_method_decorator": has_hidden_method_decorator, - "has_hidden_module": has_hidden_module, - } - - -def _dataclass_fields_or_empty(cls: Any) -> List[Any]: - """`dataclasses.fields(cls)` if cls is a dataclass; otherwise empty list.""" - import dataclasses # noqa: PLC0415 - - if dataclasses.is_dataclass(cls): - return list(dataclasses.fields(cls)) - return [] - - -def build_reference_json(*, allow_duplicate_models: bool = False) -> Dict[str, Any]: - import anyscale # noqa: PLC0415 - import anyscale._private.docgen.__main__ as docgen_main # noqa: PLC0415 - from anyscale._private.docgen import generator as wheel_generator # noqa: PLC0415 - - all_modules = docgen_main.ALL_MODULES - docgen_pkg_dir = os.path.dirname(docgen_main.__file__) - - # Hidden modules drop entirely, both from the model index and from the - # extracted output. Models inside a non-hidden module that are themselves - # marked hidden stay in the model index (so `type_to_string` still - # resolves them) but are filtered out of the module's emitted `models` - # list. Any non-hidden SDK function that still references such a model - # raises in `_extract_sdk_command`. - visible_modules = [m for m in all_modules if not _is_hidden_module(m)] - model_index = _build_model_index( - visible_modules, allow_duplicates=allow_duplicate_models - ) - hidden_models = { - model.__name__ - for m in visible_modules - for model in (m.models or []) - if _is_hidden_model(model) - } - legacy_sources = _parse_legacy_sources(docgen_pkg_dir) - modules = [ - _extract_module(m, model_index=model_index, hidden_models=hidden_models) - for m in visible_modules - ] - - # Pull renderer-policy constants from the installed wheel so the docs-side - # renderer doesn't drift if product evolves them. - cli_no_examples = sorted(getattr(wheel_generator, "CLI_NO_EXAMPLES", set())) - cli_options_to_skip = sorted(getattr(wheel_generator, "CLI_OPTIONS_TO_SKIP", set())) - - return { - "schema_version": SCHEMA_VERSION, - "anyscale_version": getattr(anyscale, "__version__", None), - "features": _detect_features(), - "constants": { - "cli_no_examples": cli_no_examples, - "cli_options_to_skip": cli_options_to_skip, - }, - "model_index": model_index, - "legacy_sources": legacy_sources, - "modules": modules, - } - - -def main() -> int: - parser = argparse.ArgumentParser( - description="Introspect installed anyscale wheel and emit reference.json." - ) - parser.add_argument("output_path", help="Where to write the JSON.") - parser.add_argument( - "--allow-duplicate-models", - action="store_true", - help=( - "Warn instead of raising on duplicate model class names. Used " - "when introspecting older wheels that registered the same model " - "in two docgen modules (e.g. CloudDeployment across " - "anyscale 0.26.48-0.26.52)." - ), - ) - args = parser.parse_args() - - data = build_reference_json(allow_duplicate_models=args.allow_duplicate_models) - os.makedirs(os.path.dirname(os.path.abspath(args.output_path)), exist_ok=True) - with open(args.output_path, "w") as f: - json.dump(data, f, indent=2, sort_keys=False) - f.write("\n") - print( - f"Wrote {args.output_path} (anyscale=={data['anyscale_version']}, " - f"{len(data['modules'])} modules, " - f"{len(data['legacy_sources']['sdks'])} legacy SDKs, " - f"{len(data['legacy_sources']['models'])} legacy models).", - file=sys.stderr, - ) - # Imports above ensured the helpers are reachable; quiet unused warnings. - _ = (escape_mdx_content, strip_sphinx_docstring) - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/util.py b/scripts/util.py deleted file mode 100644 index ffe4a44..0000000 --- a/scripts/util.py +++ /dev/null @@ -1,283 +0,0 @@ -"""Shared utilities for docs-side docgen (introspect + render + link registry).""" -from datetime import datetime -import re -import typing -from typing import Any, Dict, Optional, Type, Union - - -def url_route(filename: str) -> str: - """Path component for a module filename under `/reference/cli|sdk/...`. - - Strips the `.md` extension and the legacy `-api` suffix so URLs such as - `/reference/cli/compute-config-api` collapse to `/reference/cli/compute-config`. - The raw filename in the upstream `Module.filename` is left untouched; the - rewrite happens at the docs render boundary only. - """ - return filename.removesuffix(".md").removesuffix("-api") - - -def sentence_case(text: str) -> str: - """Lowercase every word after the first, preserving all-caps acronyms. - - Used to render module titles consistently in headings and sidebar labels: - 'Compute Config' -> 'Compute config', 'Cloud' -> 'Cloud'. Words that are - already all-uppercase (>= 2 chars) are kept as-is so 'CLI', 'SDK', - 'SCIM', etc. don't get destroyed. - """ - words = text.split(" ") - if not words: - return text - out = [words[0]] - for word in words[1:]: - if len(word) >= 2 and word.isupper(): - out.append(word) - else: - out.append(word.lower()) - return " ".join(out) - - -def kebab_slug(text: str) -> str: - """Lowercase + non-alphanumeric runs collapsed to single hyphens. - - Used to derive a stable module identifier from `Module.title`. Confirmed - against current modules: 'Cloud' -> 'cloud', 'Compute Config' -> - 'compute-config', 'Resource quotas' -> 'resource-quotas', - 'Aggregated Instance Usage' -> 'aggregated-instance-usage'. - """ - return re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-") - - -def cli_command_anchor(cli_prefix: str, name: str) -> str: - """Anchor (no leading `#`) for `### \\`{cli_prefix} {name}\\``. - - Mirrors the legacy product-side `_get_cli_anchor`: hyphenate the prefix + - name. CLI commands are already lowercase so no extra casing needed. - """ - return f"{cli_prefix} {name}".replace(" ", "-") - - -def sdk_function_anchor(sdk_prefix: str, name: str) -> str: - """Anchor for `### \\`{sdk_prefix}.{name}\\``. - - Mirrors product-side `_get_sdk_anchor`: drop the dots so - `anyscale.cloud.create` becomes `anyscalecloudcreate`. Docusaurus auto- - generates the same anchor from the heading text. - """ - return f"{sdk_prefix}.{name}".replace(".", "") - - -def model_anchor(name: str) -> str: - """Anchor for a current model heading. Lowercased class name.""" - return name.lower() - - -def legacy_sdk_anchor(name: str) -> str: - """Anchor for a legacy SDK function heading. Heading uses no explicit - anchor, so we mirror the auto-generated form: lowercased name with - underscores preserved.""" - return name.lower() - - -def legacy_model_anchor(name: str) -> str: - """Anchor for a legacy model heading. - - Legacy models use an explicit `{#name.lower()-legacy}` suffix when - rendered into the main file, and `{#name.lower()}` when rendered into - the legacy/ subfolder file. This returns the bare slug; callers append - `-legacy` if pointing at the main file. - """ - return name.lower() - - -# H2 sections that may appear inside a module's reference page. Order matches -# the order render.py emits them. The renderer omits any section whose source -# list is empty, so the helper below filters against the same module data. -MODULE_SECTIONS = ( - # (name, display, predicate-key) - ("cli", "CLI", "cli_commands"), - ("sdk", "SDK", "sdk_commands"), - ("models", "Models", "models"), -) - - -def module_section_headings(module: Dict[str, Any]) -> list: - """Return `[(name, display, anchor), ...]` for every H2 section the - rendered main page will have, in render order. - - Single source of truth for the renderer's section list and the link - registry's `section//` entries. Anchor matches Docusaurus's - auto-generated form: kebab-case of the heading text. - """ - title = module["title"] - title_slug = kebab_slug(title) - out = [] - for name, display, key in MODULE_SECTIONS: - if module.get(key): - anchor = f"{title_slug}-{name}" - out.append((name, display, anchor)) - return out - - -def cli_command_path( - module_cli_prefix: str, command_cli_prefix: str, command_name: str -) -> str: - """Path under the module for a CLI command, segments separated by `/`. - - `module_cli_prefix` is the module's base prefix (e.g., 'anyscale cloud'). - `command_cli_prefix` is what introspect attached to the specific command, - including any group prefix (e.g., 'anyscale cloud config'). Returns the - portion after the module prefix, with spaces replaced by `/`. - - Examples: - anyscale cloud + 'anyscale cloud' / 'create' -> 'create' - anyscale cloud + 'anyscale cloud config' / 'get' -> 'config/get' - anyscale + 'anyscale auth' / 'show' -> 'auth/show' (Other module) - """ - full = f"{command_cli_prefix} {command_name}" - leader = f"{module_cli_prefix} " - if full.startswith(leader): - remaining = full[len(leader):] - elif full == module_cli_prefix.rstrip(): - remaining = command_name - else: - # Fall back to stripping just the leading 'anyscale ' so we still - # produce something for unexpected inputs rather than crashing. - remaining = full.removeprefix("anyscale ").lstrip() - return remaining.replace(" ", "/") - - -def escape_mdx_content(text: Optional[str]) -> str: - """Escape content for MDX compatibility. - - Mirrors product-side _escape_mdx_content: angle brackets that look like HTML - tags get escaped, and curly braces get escaped to prevent JSX expression - interpretation. - """ - if not text: - return "" - - text = re.sub(r"<([a-zA-Z][a-zA-Z0-9\-]*?)>", r"\\<\1\\>", text) - text = re.sub(r"(? str: - """Strip sphinx/reStructuredText markers (:param, :return, etc.) from docstrings. - - Mirrors product-side strip_sphinx_docstring exactly. - """ - if not text: - return "" - - lines = text.split("\n") - filtered_lines = [] - in_sphinx_block = False - base_indent: Optional[int] = None - - for line in lines: - stripped = line.strip() - - if re.match(r"^:[a-z]+(\s+\w+)?:", stripped): - in_sphinx_block = True - base_indent = len(line) - len(line.lstrip()) - continue - - if in_sphinx_block: - assert base_indent is not None - current_indent = len(line) - len(line.lstrip()) if line.strip() else 0 - if not stripped or (stripped and current_indent > base_indent): - continue - in_sphinx_block = False - base_indent = None - - filtered_lines.append(line) - - while filtered_lines and not filtered_lines[-1].strip(): - filtered_lines.pop() - - return "\n".join(filtered_lines) - - -def type_to_string( - t: Type, - model_index: Dict[str, str], - current_module_filename: Optional[str] = None, -) -> str: - """Render a Python type annotation as a docs-flavored string. - - `model_index` maps a model class object (by id()) to its target filename so - that references to model types resolve as cross-module anchor links. - `current_module_filename` is the filename of the module the type appears - in; same-module references emit a fragment-only link, cross-module - references emit an absolute path to the canonical SDK page. - - This is a near-verbatim port of product-side _model_type_to_string + - _type_container_to_string. We resolve model anchors here so the renderer - only has to read pre-resolved strings. - """ - # Lazy import: anyscale wheel must already be installed. - from anyscale._private.models.model_base import ( # noqa: PLC0415 - ModelBaseType, - ModelEnumType, - ResultIterator, - ) - - if t is Any: - return "Any" - if t is str: - return "str" - if t is bool: - return "bool" - if t is int: - return "int" - if t is float: - return "float" - if t is bytes: - return "bytes" - if t is datetime: - return "datetime" - if t is None or t is type(None): - return "None" - - origin = typing.get_origin(t) - if origin is not None: - args = typing.get_args(t) - if origin is Union: - return " | ".join( - type_to_string(arg, model_index, current_module_filename) - for arg in args - ) - - origin_name_map = { - dict: "Dict", - list: "List", - tuple: "Tuple", - ResultIterator: "ResultIterator", - } - if origin in origin_name_map: - arg_str = ", ".join( - type_to_string(arg, model_index, current_module_filename) - for arg in args - ) - if arg_str: - return f"{origin_name_map[origin]}[{arg_str}]" - return origin_name_map[origin] - raise NotImplementedError(f"Unhandled type: {t}") - - if isinstance(t, (ModelBaseType, ModelEnumType)): - filename = model_index.get(t.__name__) - if filename is None: - raise KeyError( - f"Model {t.__name__} referenced from a type annotation but not " - f"registered in any module's `models` list." - ) - anchor = t.__name__.lower() - if filename == current_module_filename: - return f"[{t.__name__}](#{anchor})" - return f"[{t.__name__}](/reference/sdk/{url_route(filename)}#{anchor})" - - raise NotImplementedError( - f"Unhandled type: {t}. Either this type should not be in our public " - f"APIs, or you must add handling for it to the doc generator." - ) From e5305b73333266e19a3a577ecb0f39a2214c128d Mon Sep 17 00:00:00 2001 From: Ronny Roland Date: Wed, 20 May 2026 15:43:41 -0700 Subject: [PATCH 2/2] Add docs-repo read-access precondition check Fail fast if DOCS_DISPATCH_TOKEN doesn't carry contents:read on anyscale/docs, rather than 404'ing mid-archive against a partial gh api fetch. Surfaces the scope requirement clearly. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/archive.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/archive.yml b/.github/workflows/archive.yml index b67a9cc..ab7c31d 100644 --- a/.github/workflows/archive.yml +++ b/.github/workflows/archive.yml @@ -28,6 +28,23 @@ jobs: # Need full history to push a tag from the workflow. fetch-depth: 0 + # Precondition: the archive step needs to fetch introspect.py + # from anyscale/docs at the SHA pinned in .docs-introspect-sha. + # That requires GH_TOKEN with contents:read on anyscale/docs. + # Fail fast and clearly here if the token doesn't have the + # scope, rather than mid-archive run. + - name: Verify docs-repo read access + env: + GH_TOKEN: ${{ secrets.DOCS_DISPATCH_TOKEN }} + run: | + SHA="$(tr -d '[:space:]' < .docs-introspect-sha)" + if ! gh api "repos/anyscale/docs/contents/scripts/docgen/introspect.py?ref=${SHA}" \ + -H "Accept: application/vnd.github.raw" --silent > /dev/null; then + echo "::error::DOCS_DISPATCH_TOKEN can't read anyscale/docs at ${SHA}. Verify the secret has contents:read scope, or swap to a dedicated read-access token in this workflow file." + exit 1 + fi + echo "OK: docs-repo read access confirmed at ${SHA}." + - name: Determine missing versions id: missing run: |