From eaa176251386b733c7366417f661a04b9440d5c4 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Fri, 5 Jun 2026 15:19:52 -0400 Subject: [PATCH 1/2] Eliminate oc --image-for calls by reusing cached release JSON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Profiling the changelog handler for a two-release diff (4.22.0-rc.4 → 4.22.0-rc.5, two machine-OS streams) showed the following oc invocations in the NodeImageSectionMarkdown path: Step Duration oc adm release info -o json ~4.2s ListMachineOSStreams oc adm release info --image-for \ rhel-coreos ~4.1s ImageReferenceForComponent oc adm release info --image-for \ rhel-coreos-10 ~4.0s ImageReferenceForComponent oc adm release info --rpmdb-diff ... ~18s RpmDiffForStream (×2) The two --image-for calls together added ~8 seconds of wall time per changelog request. Each invoked a separate oc process for data that is already present in the release JSON: references.spec.tags[*].from.name contains the full pullspec for every component in the release, and that JSON is already cached in groupcache under the "releaseinfo" key. Changes: - Extend releaseInfoImageRefs with from.name so imageRefFromReleaseJSON can extract pullspecs from the cached release JSON. - ExecReleaseInfo.ImageReferenceForComponent now calls ReleaseInfo() and parses the JSON instead of shelling out to oc --image-for. - CachingReleaseInfo "imagefor" groupcache getter fetches "releaseinfo" via cache.Get and parses locally — no extra oc invocation. - CachingReleaseInfo "machineosstreams" getter likewise reads from the "releaseinfo" cache entry instead of calling info.ListMachineOSStreams (which bypassed groupcache and ran oc independently), eliminating a duplicate oc adm release info -o json call per request. rh-pre-commit.version: 2.4.0 rh-pre-commit.check-secrets: ENABLED --- pkg/release-controller/machine_os_tags.go | 26 ++++++++++++- pkg/release-controller/release_info.go | 47 +++++++++++++---------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/pkg/release-controller/machine_os_tags.go b/pkg/release-controller/machine_os_tags.go index fc4b1c174..41ecee9d1 100644 --- a/pkg/release-controller/machine_os_tags.go +++ b/pkg/release-controller/machine_os_tags.go @@ -13,18 +13,42 @@ type MachineOSStreamInfo struct { DisplayName string `json:"displayName,omitempty"` } -// releaseInfoImageRefs is the subset of `oc adm release info -o json` needed to list payload tags. +// releaseInfoImageRefs is the subset of `oc adm release info -o json` needed to list payload tags +// and resolve component image references. type releaseInfoImageRefs struct { References struct { Spec struct { Tags []struct { Name string `json:"name"` Annotations map[string]string `json:"annotations"` + From struct { + Name string `json:"name"` + } `json:"from"` } `json:"tags"` } `json:"spec"` } `json:"references"` } +// imageRefFromReleaseJSON returns the image pull spec for componentName from the JSON produced +// by `oc adm release info -o json`. This avoids a separate `oc adm release info --image-for` +// call when the release JSON is already available (e.g. cached via ReleaseInfo). +func imageRefFromReleaseJSON(raw, componentName string) (string, error) { + var ri releaseInfoImageRefs + if err := json.Unmarshal([]byte(raw), &ri); err != nil { + return "", err + } + for _, t := range ri.References.Spec.Tags { + if t.Name == componentName { + ref := strings.TrimSpace(t.From.Name) + if ref == "" { + return "", fmt.Errorf("empty image reference for component %q", componentName) + } + return ref, nil + } + } + return "", fmt.Errorf("component %q not found in release", componentName) +} + const versionDisplayNamesKey = "io.openshift.build.version-display-names" // ListMachineOSStreams returns machine-OS base tags discovered by pairing each *coreos* extensions diff --git a/pkg/release-controller/release_info.go b/pkg/release-controller/release_info.go index 04de0362a..ffe66102f 100644 --- a/pkg/release-controller/release_info.go +++ b/pkg/release-controller/release_info.go @@ -64,7 +64,8 @@ type CachingReleaseInfo struct { } func NewCachingReleaseInfo(info ReleaseInfo, size int64, architecture string) ReleaseInfo { - cache := groupcache.NewGroup("release", size, groupcache.GetterFunc(func(ctx context.Context, key string, sink groupcache.Sink) error { + var cache *groupcache.Group + cache = groupcache.NewGroup("release", size, groupcache.GetterFunc(func(ctx context.Context, key string, sink groupcache.Sink) error { var s string var err error parts := strings.Split(key, "\x00") @@ -149,21 +150,31 @@ func NewCachingReleaseInfo(info ReleaseInfo, size int64, architecture string) Re if len(parts) != 3 { s, err = "", fmt.Errorf("invalid imagefor key") } else { - s, err = info.ImageReferenceForComponent(parts[1], parts[2]) + var raw string + if getErr := cache.Get(ctx, strings.Join([]string{"releaseinfo", parts[1]}, "\x00"), groupcache.StringSink(&raw)); getErr != nil { + err = getErr + } else { + s, err = imageRefFromReleaseJSON(raw, parts[2]) + } } case "machineosstreams": if len(parts) != 2 { s, err = "", fmt.Errorf("invalid machineosstreams key") } else { - var streams []MachineOSStreamInfo - streams, err = info.ListMachineOSStreams(parts[1]) - if err == nil { - var b []byte - b, err = json.Marshal(streams) - if err != nil { - klog.V(4).Infof("Failed to Marshal machine OS streams for %s; %s", parts[1], err) - } else { - s = string(b) + var raw string + if getErr := cache.Get(ctx, strings.Join([]string{"releaseinfo", parts[1]}, "\x00"), groupcache.StringSink(&raw)); getErr != nil { + err = getErr + } else { + var streams []MachineOSStreamInfo + streams, err = machineOSStreamsFromReleaseJSON(raw) + if err == nil { + var b []byte + b, err = json.Marshal(streams) + if err != nil { + klog.V(4).Infof("Failed to Marshal machine OS streams for %s; %s", parts[1], err) + } else { + s = string(b) + } } } } @@ -617,8 +628,8 @@ func (r *ExecReleaseInfo) RpmList(image string) (RpmList, error) { } // ImageReferenceForComponent resolves the full image reference (pullspec) for a named component -// within a release image using oc adm release info --image-for. Returns the complete image reference -// including registry, repository, and digest. +// within a release image. It reuses the JSON from ReleaseInfo (already fetched for +// ListMachineOSStreams) instead of making a separate oc --image-for invocation. func (r *ExecReleaseInfo) ImageReferenceForComponent(releaseImage, componentName string) (string, error) { if _, err := imagereference.Parse(releaseImage); err != nil { return "", fmt.Errorf("%s is not an image reference: %v", releaseImage, err) @@ -626,15 +637,11 @@ func (r *ExecReleaseInfo) ImageReferenceForComponent(releaseImage, componentName if strings.HasPrefix(releaseImage, "-") { return "", fmt.Errorf("not a valid reference") } - out, _, err := ocCmd("adm", "release", "info", "--image-for", componentName, releaseImage) + raw, err := r.ReleaseInfo(releaseImage) if err != nil { - return "", fmt.Errorf("failed to resolve image for component %q in %s: %w", componentName, releaseImage, err) - } - ref := strings.TrimSpace(string(out)) - if ref == "" { - return "", fmt.Errorf("empty image reference for component %q in %s", componentName, releaseImage) + return "", fmt.Errorf("failed to get release info for %s: %w", releaseImage, err) } - return ref, nil + return imageRefFromReleaseJSON(raw, componentName) } // RpmListForStream loads the RPM database for a machine-os component inside the release payload From c05975aa09e5ecf4c0984582f2904fcf6ecf8358 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Fri, 5 Jun 2026 15:32:51 -0400 Subject: [PATCH 2/2] Narrow semaphore scope to rpmdb-only operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ImageReferenceForComponent now resolves from groupcache (JSON parse, no oc subprocess, no /tmp/rpmdb/ access) after the --image-for elimination. Move it outside the RpmdbOCSlots semaphore in both NodeImageSectionMarkdown and the API handler so the slot is only held during RpmListForStream and RpmDiffForStream — the calls that actually spawn oc subprocesses and write to /tmp/rpmdb/. On a cold cache with two machine-OS streams, profiling showed: ImageReferenceForComponent × 2: ~8s (now outside semaphore) RpmListForStream × 2: ~8s (inside semaphore) RpmDiffForStream × 2: ~18s (inside semaphore) This reduces per-request semaphore hold time from ~34s to ~26s, allowing more concurrent changelog requests to proceed before the 16-slot limit causes 429 responses. On a freshly restarted controller where hundreds of release pages hit the semaphore simultaneously, this frees ~8 seconds of slot time per request for other callers. rh-pre-commit.version: 2.4.0 rh-pre-commit.check-secrets: ENABLED --- cmd/release-controller-api/http.go | 41 ++++++++++++++++++------------ pkg/rhcos/node_image.go | 22 +++++++++------- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/cmd/release-controller-api/http.go b/cmd/release-controller-api/http.go index 0fd037898..2887b1366 100644 --- a/cmd/release-controller-api/http.go +++ b/cmd/release-controller-api/http.go @@ -723,11 +723,24 @@ func (c *Controller) apiReleaseInfo(w http.ResponseWriter, req *http.Request) { return } - // Acquire a single semaphore slot for all stream operations, - // matching the HTML path (pkg/rhcos/node_image.go). - // RpmDiffForStream with a non-empty tag uses ocCmd (no - // semaphore), so without this guard concurrent API requests - // could spawn unbounded oc processes. + // Pre-resolve which streams exist in the from-release before + // acquiring the semaphore. ImageReferenceForComponent resolves + // from groupcache (JSON parse, no oc subprocess). + type streamEntry struct { + releasecontroller.APINodeImageStream + hasFrom bool + } + entries := make([]streamEntry, len(streams)) + for i, stream := range streams { + entries[i].Name = stream.DisplayName + entries[i].Tag = stream.Tag + if _, errFrom := c.releaseInfo.ImageReferenceForComponent(tagInfo.PreviousTagPullSpec, stream.Tag); errFrom == nil { + entries[i].hasFrom = true + } + } + + // Acquire a semaphore slot only for the rpmdb operations + // (RpmDiffForStream) which spawn oc subprocesses. select { case releasecontroller.RpmdbOCSlots <- struct{}{}: default: @@ -739,24 +752,20 @@ func (c *Controller) apiReleaseInfo(w http.ResponseWriter, req *http.Request) { } defer func() { <-releasecontroller.RpmdbOCSlots }() - result := make([]releasecontroller.APINodeImageStream, 0, len(streams)) - for _, stream := range streams { + result := make([]releasecontroller.APINodeImageStream, 0, len(entries)) + for _, e := range entries { if req.Context().Err() != nil { return } - entry := releasecontroller.APINodeImageStream{ - Name: stream.DisplayName, - Tag: stream.Tag, - } - if _, errFrom := c.releaseInfo.ImageReferenceForComponent(tagInfo.PreviousTagPullSpec, stream.Tag); errFrom == nil { - diff, err := c.releaseInfo.RpmDiffForStream(tagInfo.PreviousTagPullSpec, tagInfo.TagPullSpec, stream.Tag) + if e.hasFrom { + diff, err := c.releaseInfo.RpmDiffForStream(tagInfo.PreviousTagPullSpec, tagInfo.TagPullSpec, e.Tag) if err != nil { - klog.V(4).Infof("Unable to retrieve RPM diff for stream %s in %s: %v", stream.Tag, tagInfo.Tag, err) + klog.V(4).Infof("Unable to retrieve RPM diff for stream %s in %s: %v", e.Tag, tagInfo.Tag, err) } else { - entry.RpmDiff = &diff + e.RpmDiff = &diff } } - result = append(result, entry) + result = append(result, e.APINodeImageStream) } nodeImageStreams = result }() diff --git a/pkg/rhcos/node_image.go b/pkg/rhcos/node_image.go index 68df2c40b..0dfbdf32d 100644 --- a/pkg/rhcos/node_image.go +++ b/pkg/rhcos/node_image.go @@ -41,10 +41,18 @@ func NodeImageSectionMarkdown(ctx context.Context, info releasecontroller.Releas return RenderNodeImageInfo(changelogMarkdown, rpmlist, rpmdiff), nil } - // Acquire single semaphore slot for all stream operations. - // Respect context cancellation so orphaned goroutines from timed-out HTTP - // handlers release their slot promptly instead of holding it until all - // streams are processed. + // Pre-resolve which streams exist in the from-release before acquiring + // the semaphore. ImageReferenceForComponent resolves from groupcache + // (JSON parse only, no oc subprocess) and does not need the slot. + streamHasFrom := make([]bool, len(streams)) + for i, stream := range streams { + if _, errFrom := info.ImageReferenceForComponent(fromReleasePullSpec, stream.Tag); errFrom == nil { + streamHasFrom[i] = true + } + } + + // Acquire a semaphore slot only for the rpmdb operations (RpmListForStream, + // RpmDiffForStream) which spawn oc subprocesses and write to /tmp/rpmdb/. select { case releasecontroller.RpmdbOCSlots <- struct{}{}: case <-ctx.Done(): @@ -55,13 +63,9 @@ func NodeImageSectionMarkdown(ctx context.Context, info releasecontroller.Releas } defer func() { <-releasecontroller.RpmdbOCSlots }() - // Process all streams sequentially to avoid cache stomping nodeStreams := make([]CoreOSNodeStream, len(streams)) for i, stream := range streams { - // Between stream iterations, check if the caller gave up (e.g. HTTP - // handler timed out). This releases the semaphore slot without waiting - // for remaining streams to finish. if ctx.Err() != nil { return "", ctx.Err() } @@ -73,7 +77,7 @@ func NodeImageSectionMarkdown(ctx context.Context, info releasecontroller.Releas } var diff releasecontroller.RpmDiff - if _, errFrom := info.ImageReferenceForComponent(fromReleasePullSpec, stream.Tag); errFrom == nil { + if streamHasFrom[i] { diff, err = info.RpmDiffForStream(fromReleasePullSpec, toReleasePullSpec, stream.Tag) if err != nil { return "", fmt.Errorf("failed to fetch RPM diff for stream %s: %w", stream.Tag, err)