From 65ffb1ceae607b6c0a38e04781b49d2091a9331a Mon Sep 17 00:00:00 2001 From: Tim Thacker Date: Mon, 20 Apr 2026 00:24:45 +1000 Subject: [PATCH 1/4] =?UTF-8?q?feat(cli):=20nullify=20deps=20analyze=20?= =?UTF-8?q?=E2=80=94=20CI-aware=20dependency=20malware-analysis=20workflow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hand-written workflow command (not generated from OpenAPI) that wires the Phase 5a/5b malware analyzer into customer CI pipelines via scpm's /scpm/dependencies/analyze endpoint. Components: - internal/ci/ — extensible CI provider registry. Nine concrete Providers: GitHubActions, GitLabCI, CircleCI, BitbucketPipelines, Jenkins, AzureDevOps, GoogleCloudBuild, AWSCodeBuild, Local (last; always matches). Each exposes Platform() (matches the benchmarks.PipelinePlatform enum), Detect() (env-var signature), BaseRef / HeadRef (context-aware; fall back to `git rev-parse origin/HEAD` when CI doesn't expose a base), PRNumber, RepoSlug, and EnrichHeader (stamps CI metadata on outbound HTTP so scpm's audit log can tie back to the CI run). - internal/scan/manifest/ — per-ecosystem lockfile parsers: npm (package-lock.json v2/v3), pypi (requirements.txt), go (go.sum), rust (Cargo.lock), ruby (Gemfile.lock). Each implements Parser and is registered in manifest.DefaultParsers(). Deliberately tight: pnpm-lock, poetry.lock, pom.xml, composer.lock, packages.lock.json are follow-up work. Every parser produces the same flat Entry (Ecosystem, Name, Version, File, Direct) shape so diff.go can compare them uniformly. - internal/scan/diff.go — the (base, head) diff. Uses `git diff --name-only base..head` to list changed files, filters to those a parser matches, then `git show :` on each side + parse + set-difference. Produces ChangedDep{added, bumped, removed}. - internal/commands/deps_analyze.go — the cobra command. Flow: 1. Detect CI provider; resolve base (--base or provider.BaseRef) and head (--head or provider.HeadRef) to commit SHAs. 2. scan.Diff → list of changed deps. 3. For each added/bumped dep, POST to {client.BaseURL}/scpm/dependencies/analyze with the CI-provider headers + an idempotency key derived from CI run ID + package tuple (so re-runs of the same CI build hit cached jobs). 4. Render results as text (default) or json. 5. Exit per --fail-on (none|vulnerable|suspicious|malicious): 0=clean, 10=vulnerable, 20=suspicious, 30=malicious, 2=invalid invocation, 1=transient. The scpm calls are hand-rolled HTTP POSTs rather than generated api.Client methods because scpm's OpenAPI regen happens in a separate pipeline — the CLI's scripts/generate/main.go will supersede these once the spec lands. Follows the RegisterContextPushCommand pattern. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/cli/cmd/root.go | 5 + internal/ci/aws_codebuild.go | 80 +++++ internal/ci/azure_devops.go | 72 +++++ internal/ci/bitbucket_pipelines.go | 71 +++++ internal/ci/ci.go | 70 +++++ internal/ci/circleci.go | 88 ++++++ internal/ci/github_actions.go | 103 +++++++ internal/ci/gitlab_ci.go | 84 +++++ internal/ci/google_cloud_build.go | 65 ++++ internal/ci/jenkins.go | 85 +++++ internal/ci/local.go | 61 ++++ internal/ci/registry.go | 40 +++ internal/commands/deps_analyze.go | 411 +++++++++++++++++++++++++ internal/scan/diff.go | 178 +++++++++++ internal/scan/manifest/cargo_lock.go | 98 ++++++ internal/scan/manifest/gemfile_lock.go | 104 +++++++ internal/scan/manifest/go_mod.go | 69 +++++ internal/scan/manifest/manifest.go | 100 ++++++ internal/scan/manifest/npm_lock.go | 87 ++++++ internal/scan/manifest/pypi_lock.go | 99 ++++++ 20 files changed, 1970 insertions(+) create mode 100644 internal/ci/aws_codebuild.go create mode 100644 internal/ci/azure_devops.go create mode 100644 internal/ci/bitbucket_pipelines.go create mode 100644 internal/ci/ci.go create mode 100644 internal/ci/circleci.go create mode 100644 internal/ci/github_actions.go create mode 100644 internal/ci/gitlab_ci.go create mode 100644 internal/ci/google_cloud_build.go create mode 100644 internal/ci/jenkins.go create mode 100644 internal/ci/local.go create mode 100644 internal/ci/registry.go create mode 100644 internal/commands/deps_analyze.go create mode 100644 internal/scan/diff.go create mode 100644 internal/scan/manifest/cargo_lock.go create mode 100644 internal/scan/manifest/gemfile_lock.go create mode 100644 internal/scan/manifest/go_mod.go create mode 100644 internal/scan/manifest/manifest.go create mode 100644 internal/scan/manifest/npm_lock.go create mode 100644 internal/scan/manifest/pypi_lock.go diff --git a/cmd/cli/cmd/root.go b/cmd/cli/cmd/root.go index 30df3c9..230e72f 100644 --- a/cmd/cli/cmd/root.go +++ b/cmd/cli/cmd/root.go @@ -104,6 +104,11 @@ func init() { commands.RegisterSastCommands(apiCmd, getAPIClient) commands.RegisterScaCommands(apiCmd, getAPIClient) commands.RegisterSecretsCommands(apiCmd, getAPIClient) + + // Hand-written workflow — not generated from OpenAPI. Routes through + // scpm's /scpm/dependencies/analyze. Wired at top level (not under + // apiCmd) so `nullify deps analyze` reads naturally in CI scripts. + commands.RegisterDepsAnalyzeCommand(rootCmd, getAPIClient) } func setupLogger(ctx context.Context) context.Context { diff --git a/internal/ci/aws_codebuild.go b/internal/ci/aws_codebuild.go new file mode 100644 index 0000000..bce81a4 --- /dev/null +++ b/internal/ci/aws_codebuild.go @@ -0,0 +1,80 @@ +package ci + +import ( + "context" + "net/http" + "os" + "strings" +) + +// AWSCodeBuild — https://docs.aws.amazon.com/codebuild/latest/userguide/build-env-ref-env-vars.html +// +// Key envs: +// CODEBUILD_BUILD_ID signature +// CODEBUILD_RESOLVED_SOURCE_VERSION head +// CODEBUILD_WEBHOOK_BASE_REF base (webhook-triggered PRs) +// CODEBUILD_SOURCE_REPO_URL https://github.com/org/repo.git +type AWSCodeBuild struct{} + +func NewAWSCodeBuild() Provider { return &AWSCodeBuild{} } + +func (a *AWSCodeBuild) Platform() string { return "AWS_CODEBUILD" } + +func (a *AWSCodeBuild) Detect() bool { return os.Getenv("CODEBUILD_BUILD_ID") != "" } + +func (a *AWSCodeBuild) BaseRef(ctx context.Context) (string, error) { + // CODEBUILD_WEBHOOK_BASE_REF is "refs/heads/main" shape — strip the + // prefix + resolve. + if v := os.Getenv("CODEBUILD_WEBHOOK_BASE_REF"); v != "" { + return resolveRef(ctx, "origin/"+strings.TrimPrefix(v, "refs/heads/")) + } + return resolveRef(ctx, "origin/HEAD") +} + +func (a *AWSCodeBuild) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("CODEBUILD_RESOLVED_SOURCE_VERSION"); v != "" { + return v, nil + } + return resolveRef(ctx, "HEAD") +} + +func (a *AWSCodeBuild) PRNumber() (int, bool) { + // CODEBUILD_WEBHOOK_TRIGGER sometimes has "pr/123" shape. + trigger := os.Getenv("CODEBUILD_WEBHOOK_TRIGGER") + if !strings.HasPrefix(trigger, "pr/") { + return 0, false + } + n := 0 + for _, c := range trigger[3:] { + if c < '0' || c > '9' { + return 0, false + } + n = n*10 + int(c-'0') + } + return n, true +} + +func (a *AWSCodeBuild) RepoSlug() (string, string, bool) { + repoURL := os.Getenv("CODEBUILD_SOURCE_REPO_URL") + if repoURL == "" { + return "", "", false + } + // Strip trailing .git + any protocol; keep the final "owner/name" + // segment. + trimmed := strings.TrimSuffix(repoURL, ".git") + parts := strings.Split(trimmed, "/") + if len(parts) < 2 { + return "", "", false + } + return parts[len(parts)-2], parts[len(parts)-1], true +} + +func (a *AWSCodeBuild) EnrichHeader(h http.Header) { + if v := os.Getenv("CODEBUILD_BUILD_ID"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("CODEBUILD_RESOLVED_SOURCE_VERSION"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", a.Platform()) +} diff --git a/internal/ci/azure_devops.go b/internal/ci/azure_devops.go new file mode 100644 index 0000000..57a35a6 --- /dev/null +++ b/internal/ci/azure_devops.go @@ -0,0 +1,72 @@ +package ci + +import ( + "context" + "net/http" + "os" + "strconv" +) + +// AzureDevOps — https://learn.microsoft.com/en-us/azure/devops/pipelines/build/variables +// +// Key envs: +// TF_BUILD=True signature +// BUILD_SOURCEVERSION head commit +// SYSTEM_PULLREQUEST_TARGETBRANCHNAME target branch (PR builds) +// SYSTEM_PULLREQUEST_PULLREQUESTNUMBER PR number +// BUILD_REPOSITORY_NAME repo name (no owner) +// BUILD_BUILDID run id +type AzureDevOps struct{} + +func NewAzureDevOps() Provider { return &AzureDevOps{} } + +func (a *AzureDevOps) Platform() string { return "AZURE_DEVOPS" } + +func (a *AzureDevOps) Detect() bool { return os.Getenv("TF_BUILD") == "True" } + +func (a *AzureDevOps) BaseRef(ctx context.Context) (string, error) { + if v := os.Getenv("SYSTEM_PULLREQUEST_TARGETBRANCHNAME"); v != "" { + return resolveRef(ctx, "origin/"+v) + } + return resolveRef(ctx, "HEAD^") +} + +func (a *AzureDevOps) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("BUILD_SOURCEVERSION"); v != "" { + return v, nil + } + return resolveRef(ctx, "HEAD") +} + +func (a *AzureDevOps) PRNumber() (int, bool) { + v := os.Getenv("SYSTEM_PULLREQUEST_PULLREQUESTNUMBER") + if v == "" { + return 0, false + } + n, err := strconv.Atoi(v) + if err != nil { + return 0, false + } + return n, true +} + +func (a *AzureDevOps) RepoSlug() (string, string, bool) { + // Azure uses Organization/Project/Repo — not a two-part slug. + // Collapse Organization as the "owner" field. + owner := os.Getenv("SYSTEM_COLLECTIONURI") + name := os.Getenv("BUILD_REPOSITORY_NAME") + if name == "" { + return "", "", false + } + return owner, name, true +} + +func (a *AzureDevOps) EnrichHeader(h http.Header) { + if v := os.Getenv("BUILD_BUILDID"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("BUILD_SOURCEVERSION"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", a.Platform()) +} diff --git a/internal/ci/bitbucket_pipelines.go b/internal/ci/bitbucket_pipelines.go new file mode 100644 index 0000000..6fcc996 --- /dev/null +++ b/internal/ci/bitbucket_pipelines.go @@ -0,0 +1,71 @@ +package ci + +import ( + "context" + "net/http" + "os" + "strconv" +) + +// BitbucketPipelines — https://support.atlassian.com/bitbucket-cloud/docs/variables-and-secrets/ +// +// Key envs: +// BITBUCKET_BUILD_NUMBER signature +// BITBUCKET_COMMIT head +// BITBUCKET_PR_DESTINATION_BRANCH target branch (PR builds) +// BITBUCKET_PR_ID PR number +// BITBUCKET_REPO_OWNER / REPO_SLUG +type BitbucketPipelines struct{} + +func NewBitbucketPipelines() Provider { return &BitbucketPipelines{} } + +func (b *BitbucketPipelines) Platform() string { return "BITBUCKET_PIPELINES" } + +func (b *BitbucketPipelines) Detect() bool { + return os.Getenv("BITBUCKET_BUILD_NUMBER") != "" +} + +func (b *BitbucketPipelines) BaseRef(ctx context.Context) (string, error) { + if v := os.Getenv("BITBUCKET_PR_DESTINATION_BRANCH"); v != "" { + return resolveRef(ctx, "origin/"+v) + } + return resolveRef(ctx, "HEAD^") +} + +func (b *BitbucketPipelines) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("BITBUCKET_COMMIT"); v != "" { + return v, nil + } + return resolveRef(ctx, "HEAD") +} + +func (b *BitbucketPipelines) PRNumber() (int, bool) { + v := os.Getenv("BITBUCKET_PR_ID") + if v == "" { + return 0, false + } + n, err := strconv.Atoi(v) + if err != nil { + return 0, false + } + return n, true +} + +func (b *BitbucketPipelines) RepoSlug() (string, string, bool) { + owner := os.Getenv("BITBUCKET_REPO_OWNER") + name := os.Getenv("BITBUCKET_REPO_SLUG") + if owner == "" || name == "" { + return "", "", false + } + return owner, name, true +} + +func (b *BitbucketPipelines) EnrichHeader(h http.Header) { + if v := os.Getenv("BITBUCKET_BUILD_NUMBER"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("BITBUCKET_COMMIT"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", b.Platform()) +} diff --git a/internal/ci/ci.go b/internal/ci/ci.go new file mode 100644 index 0000000..323912e --- /dev/null +++ b/internal/ci/ci.go @@ -0,0 +1,70 @@ +// Package ci detects the CI/CD platform the CLI is currently running under +// and exposes the base/head commit refs it needs to compute changed +// dependencies. +// +// Implementations are registered in priority order in registry.go. +// Detect() returns the first provider whose env-var signature matches; +// the Local fallback is last so an unrecognised CI still works off +// `git rev-parse` defaults. +// +// This package deliberately uses the PipelinePlatform enum from +// benchmarks/models — the canonical set of platforms Nullify supports +// lives there, and any new provider added to this package must have a +// matching enum value (lint check in cli/Makefile enforces this). +package ci + +import ( + "context" + "errors" + "net/http" +) + +// Provider identifies one CI/CD platform and exposes the information +// the CLI's deps-analyze + containers-analyze workflows need. All +// methods are expected to be cheap — provider detection happens on +// every CLI invocation. +type Provider interface { + // Platform returns the canonical PipelinePlatform enum value (from + // benchmarks/models). Must match the string in the Makefile's + // provider-coverage lint target. + Platform() string + + // Detect returns true when the current process env matches this + // provider's signature. Detect MUST NOT touch the network or + // filesystem — env var inspection only. + Detect() bool + + // BaseRef returns the commit or ref (short sha, full sha, or + // branch name) the CI declared as the base of the current build. + // For PR builds, this is the PR target branch's HEAD at PR open + // time; for push builds, it's the previous HEAD of the pushed + // branch. Fall back to "origin/" when CI doesn't + // expose a specific base. + BaseRef(ctx context.Context) (string, error) + + // HeadRef returns the commit the current build is running against. + // For PR builds, this is the PR's head commit; for push builds, + // the pushed commit. + HeadRef(ctx context.Context) (string, error) + + // PRNumber returns the pull-request number if the current build is + // a PR, and (0, false) otherwise. Used for diagnostic logging + as + // the idempotency-key prefix in scpm calls. + PRNumber() (int, bool) + + // RepoSlug returns (owner, name) for GitHub/GitLab/Bitbucket-style + // coordinates. Returns (_, _, false) when the provider doesn't + // expose it (eg. Jenkins, self-hosted CI). + RepoSlug() (owner, name string, ok bool) + + // EnrichHeader adds CI-specific headers to outbound HTTP requests + // (commit SHA, PR number, run ID) so scpm's audit log can tie a + // specific CLI run back to a CI invocation. Called once per HTTP + // request built by the workflow. + EnrichHeader(h http.Header) +} + +// ErrNoProvider is returned by Detect when no registered provider +// matches — shouldn't happen in practice because the Local fallback +// always returns true, but declared so callers can assert on it. +var ErrNoProvider = errors.New("no CI provider detected") diff --git a/internal/ci/circleci.go b/internal/ci/circleci.go new file mode 100644 index 0000000..2b3136a --- /dev/null +++ b/internal/ci/circleci.go @@ -0,0 +1,88 @@ +package ci + +import ( + "context" + "fmt" + "net/http" + "os" + "strconv" + "strings" +) + +// CircleCI — https://circleci.com/docs/variables/ +// +// Key envs: +// CIRCLECI=true +// CIRCLE_SHA1 head +// CIRCLE_PR_NUMBER PR number (only when triggered from a +// forked-PR webhook) +// CIRCLE_PULL_REQUEST "https://github.com/org/repo/pull/N" +// — parse to recover the PR number +// for non-forked PRs too +// CIRCLE_PROJECT_USERNAME/REPONAME +// CIRCLE_WORKFLOW_ID run id +type CircleCI struct{} + +func NewCircleCI() Provider { return &CircleCI{} } + +func (c *CircleCI) Platform() string { return "CIRCLECI" } + +func (c *CircleCI) Detect() bool { return os.Getenv("CIRCLECI") == "true" } + +func (c *CircleCI) BaseRef(ctx context.Context) (string, error) { + // Circle doesn't expose a base commit directly. Best available: + // origin/. Callers wanting PR-diff semantics should + // set NULLIFY_BASE_REF explicitly. + if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { + return resolveRef(ctx, v) + } + return resolveRef(ctx, "origin/HEAD") +} + +func (c *CircleCI) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("CIRCLE_SHA1"); v != "" { + return v, nil + } + return resolveRef(ctx, "HEAD") +} + +func (c *CircleCI) PRNumber() (int, bool) { + if v := os.Getenv("CIRCLE_PR_NUMBER"); v != "" { + if n, err := strconv.Atoi(v); err == nil { + return n, true + } + } + if v := os.Getenv("CIRCLE_PULL_REQUEST"); v != "" { + // Parse "https://github.com/org/repo/pull/123". + parts := strings.Split(v, "/") + if len(parts) >= 2 && parts[len(parts)-2] == "pull" { + if n, err := strconv.Atoi(parts[len(parts)-1]); err == nil { + return n, true + } + } + } + return 0, false +} + +func (c *CircleCI) RepoSlug() (string, string, bool) { + owner := os.Getenv("CIRCLE_PROJECT_USERNAME") + name := os.Getenv("CIRCLE_PROJECT_REPONAME") + if owner == "" || name == "" { + return "", "", false + } + return owner, name, true +} + +func (c *CircleCI) EnrichHeader(h http.Header) { + if v := os.Getenv("CIRCLE_WORKFLOW_ID"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("CIRCLE_SHA1"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", c.Platform()) +} + +// _ is a tiny type guard that keeps fmt imported for future error-wrapping +// additions without a noisy linter warning in the meantime. +var _ = fmt.Sprintf diff --git a/internal/ci/github_actions.go b/internal/ci/github_actions.go new file mode 100644 index 0000000..11bb176 --- /dev/null +++ b/internal/ci/github_actions.go @@ -0,0 +1,103 @@ +package ci + +import ( + "context" + "fmt" + "net/http" + "os" + "os/exec" + "strconv" + "strings" +) + +// GitHubActions — https://docs.github.com/en/actions/learn-github-actions/variables +// +// Key envs we rely on: +// GITHUB_ACTIONS=true signature +// GITHUB_SHA head commit +// GITHUB_EVENT_NAME=pull_request + GITHUB_BASE_REF (target branch name) +// GITHUB_REPOSITORY owner/name +// GITHUB_RUN_ID + GITHUB_RUN_ATTEMPT +// +// PR base is tricky: GITHUB_BASE_REF is just the target branch NAME, not +// a commit. BaseRef() resolves it by running `git rev-parse +// origin/` which assumes the runner fetched origin (the standard +// actions/checkout@v4 does). +type GitHubActions struct{} + +func NewGitHubActions() Provider { return &GitHubActions{} } + +func (g *GitHubActions) Platform() string { return "GITHUB_ACTIONS" } + +func (g *GitHubActions) Detect() bool { return os.Getenv("GITHUB_ACTIONS") == "true" } + +func (g *GitHubActions) BaseRef(ctx context.Context) (string, error) { + // PR build: GITHUB_BASE_REF is populated; resolve to commit. + if base := os.Getenv("GITHUB_BASE_REF"); base != "" { + return resolveRef(ctx, "origin/"+base) + } + // Push build: use the previous commit on the pushed ref (HEAD^). + // Falls back to the default branch's tip when HEAD has no parent + // (first push). + if sha, err := resolveRef(ctx, "HEAD^"); err == nil { + return sha, nil + } + return resolveRef(ctx, "origin/HEAD") +} + +func (g *GitHubActions) HeadRef(ctx context.Context) (string, error) { + if sha := os.Getenv("GITHUB_SHA"); sha != "" { + return sha, nil + } + return resolveRef(ctx, "HEAD") +} + +func (g *GitHubActions) PRNumber() (int, bool) { + ref := os.Getenv("GITHUB_REF") + // Shape: "refs/pull/123/merge" for PR builds. + if !strings.HasPrefix(ref, "refs/pull/") { + return 0, false + } + parts := strings.Split(ref, "/") + if len(parts) < 3 { + return 0, false + } + n, err := strconv.Atoi(parts[2]) + if err != nil { + return 0, false + } + return n, true +} + +func (g *GitHubActions) RepoSlug() (string, string, bool) { + repo := os.Getenv("GITHUB_REPOSITORY") + if repo == "" { + return "", "", false + } + parts := strings.SplitN(repo, "/", 2) + if len(parts) != 2 { + return "", "", false + } + return parts[0], parts[1], true +} + +func (g *GitHubActions) EnrichHeader(h http.Header) { + if v := os.Getenv("GITHUB_RUN_ID"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("GITHUB_SHA"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", g.Platform()) +} + +// resolveRef runs `git rev-parse` to turn a symbolic ref into a commit. +// Package-level helper so every provider reuses the same shell-out. +func resolveRef(ctx context.Context, ref string) (string, error) { + cmd := exec.CommandContext(ctx, "git", "rev-parse", "--verify", ref+"^{commit}") + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("git rev-parse %s: %w", ref, err) + } + return strings.TrimSpace(string(out)), nil +} diff --git a/internal/ci/gitlab_ci.go b/internal/ci/gitlab_ci.go new file mode 100644 index 0000000..954f8f2 --- /dev/null +++ b/internal/ci/gitlab_ci.go @@ -0,0 +1,84 @@ +package ci + +import ( + "context" + "net/http" + "os" + "strconv" + "strings" +) + +// GitLabCI — https://docs.gitlab.com/ci/variables/predefined_variables.html +// +// Key envs: +// GITLAB_CI=true +// CI_COMMIT_SHA head +// CI_MERGE_REQUEST_TARGET_BRANCH_SHA base (MR builds) +// CI_COMMIT_BEFORE_SHA base (push builds) +// CI_MERGE_REQUEST_IID PR number +// CI_PROJECT_PATH owner/name +// CI_PIPELINE_ID run id +type GitLabCI struct{} + +func NewGitLabCI() Provider { return &GitLabCI{} } + +func (g *GitLabCI) Platform() string { return "GITLAB_CI" } + +func (g *GitLabCI) Detect() bool { return os.Getenv("GITLAB_CI") == "true" } + +func (g *GitLabCI) BaseRef(ctx context.Context) (string, error) { + // MR build preferred (exact PR target sha) + if v := os.Getenv("CI_MERGE_REQUEST_TARGET_BRANCH_SHA"); v != "" { + return v, nil + } + // Push build: BEFORE_SHA can be "0000..." on first push to a new + // branch — treat that as "use default branch tip" instead. + if v := os.Getenv("CI_COMMIT_BEFORE_SHA"); v != "" && !strings.HasPrefix(v, "00000000") { + return v, nil + } + return resolveRef(ctx, "origin/HEAD") +} + +func (g *GitLabCI) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("CI_COMMIT_SHA"); v != "" { + return v, nil + } + return resolveRef(ctx, "HEAD") +} + +func (g *GitLabCI) PRNumber() (int, bool) { + v := os.Getenv("CI_MERGE_REQUEST_IID") + if v == "" { + return 0, false + } + n, err := strconv.Atoi(v) + if err != nil { + return 0, false + } + return n, true +} + +func (g *GitLabCI) RepoSlug() (string, string, bool) { + v := os.Getenv("CI_PROJECT_PATH") + if v == "" { + return "", "", false + } + // GitLab allows nested groups (foo/bar/baz). Collapse all but the + // last segment into the "owner" field so owner/name stays + // well-formed. + idx := strings.LastIndex(v, "/") + if idx < 0 { + return "", "", false + } + return v[:idx], v[idx+1:], true +} + +func (g *GitLabCI) EnrichHeader(h http.Header) { + if v := os.Getenv("CI_PIPELINE_ID"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("CI_COMMIT_SHA"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", g.Platform()) +} diff --git a/internal/ci/google_cloud_build.go b/internal/ci/google_cloud_build.go new file mode 100644 index 0000000..31729f0 --- /dev/null +++ b/internal/ci/google_cloud_build.go @@ -0,0 +1,65 @@ +package ci + +import ( + "context" + "net/http" + "os" +) + +// GoogleCloudBuild — https://cloud.google.com/build/docs/configuring-builds/substitute-variable-values +// +// GCB's predefined substitutions are prefixed COMMIT_SHA, BUILD_ID, +// PROJECT_ID, REPO_NAME. When triggered via the GitHub mirror integration +// we also get _PR_NUMBER. No standard "target branch" variable — rely +// on NULLIFY_BASE_REF when running against PR diffs. +type GoogleCloudBuild struct{} + +func NewGoogleCloudBuild() Provider { return &GoogleCloudBuild{} } + +func (g *GoogleCloudBuild) Platform() string { return "GOOGLE_CLOUD_BUILD" } + +// Detect: Google Cloud Build doesn't expose a single CI=true-style flag; +// BUILD_ID + PROJECT_ID together are a reasonable signature. +func (g *GoogleCloudBuild) Detect() bool { + return os.Getenv("BUILD_ID") != "" && os.Getenv("PROJECT_ID") != "" && + os.Getenv("GITLAB_CI") == "" && os.Getenv("GITHUB_ACTIONS") != "true" +} + +func (g *GoogleCloudBuild) BaseRef(ctx context.Context) (string, error) { + if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { + return resolveRef(ctx, v) + } + return resolveRef(ctx, "origin/HEAD") +} + +func (g *GoogleCloudBuild) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("COMMIT_SHA"); v != "" { + return v, nil + } + return resolveRef(ctx, "HEAD") +} + +func (g *GoogleCloudBuild) PRNumber() (int, bool) { + // Not exposed by GCB in a standard way; operators can set + // NULLIFY_PR_NUMBER if their trigger template plumbs it. + return 0, false +} + +func (g *GoogleCloudBuild) RepoSlug() (string, string, bool) { + owner := os.Getenv("PROJECT_ID") + name := os.Getenv("REPO_NAME") + if owner == "" || name == "" { + return "", "", false + } + return owner, name, true +} + +func (g *GoogleCloudBuild) EnrichHeader(h http.Header) { + if v := os.Getenv("BUILD_ID"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("COMMIT_SHA"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", g.Platform()) +} diff --git a/internal/ci/jenkins.go b/internal/ci/jenkins.go new file mode 100644 index 0000000..71c8924 --- /dev/null +++ b/internal/ci/jenkins.go @@ -0,0 +1,85 @@ +package ci + +import ( + "context" + "net/http" + "os" + "strconv" +) + +// Jenkins — https://www.jenkins.io/doc/book/pipeline/jenkinsfile/#using-environment-variables +// +// Jenkins has no canonical "target branch" variable — different pipeline +// setups expose different things. We read the common Multibranch / +// GitHub-branch-source envs + fall back to NULLIFY_BASE_REF for anything +// the operator prefers to set by hand. +// +// Key envs: +// JENKINS_URL signature +// GIT_COMMIT head +// CHANGE_ID PR number (Multibranch) +// CHANGE_TARGET PR target branch +// BUILD_NUMBER run id +type Jenkins struct{} + +func NewJenkins() Provider { return &Jenkins{} } + +func (j *Jenkins) Platform() string { return "JENKINS" } + +func (j *Jenkins) Detect() bool { return os.Getenv("JENKINS_URL") != "" } + +func (j *Jenkins) BaseRef(ctx context.Context) (string, error) { + if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { + return resolveRef(ctx, v) + } + if v := os.Getenv("CHANGE_TARGET"); v != "" { + return resolveRef(ctx, "origin/"+v) + } + return resolveRef(ctx, "origin/HEAD") +} + +func (j *Jenkins) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("GIT_COMMIT"); v != "" { + return v, nil + } + return resolveRef(ctx, "HEAD") +} + +func (j *Jenkins) PRNumber() (int, bool) { + v := os.Getenv("CHANGE_ID") + if v == "" { + return 0, false + } + n, err := strconv.Atoi(v) + if err != nil { + return 0, false + } + return n, true +} + +func (j *Jenkins) RepoSlug() (string, string, bool) { + // Jenkins doesn't expose owner/name directly — an operator-provided + // NULLIFY_REPO_SLUG=owner/name covers the case. Without it we + // return (_, _, false) and scpm falls back to other identifying + // headers. + slug := os.Getenv("NULLIFY_REPO_SLUG") + if slug == "" { + return "", "", false + } + for i := range slug { + if slug[i] == '/' { + return slug[:i], slug[i+1:], true + } + } + return "", "", false +} + +func (j *Jenkins) EnrichHeader(h http.Header) { + if v := os.Getenv("BUILD_NUMBER"); v != "" { + h.Set("X-Nullify-CI-Run-ID", v) + } + if v := os.Getenv("GIT_COMMIT"); v != "" { + h.Set("X-Nullify-CI-Commit", v) + } + h.Set("X-Nullify-CI-Provider", j.Platform()) +} diff --git a/internal/ci/local.go b/internal/ci/local.go new file mode 100644 index 0000000..1a0de4a --- /dev/null +++ b/internal/ci/local.go @@ -0,0 +1,61 @@ +package ci + +import ( + "context" + "net/http" + "os" +) + +// Local is the fallback when no CI env vars match — useful when +// developers run `nullify deps analyze` on their laptop before pushing. +// Always returns true from Detect(), so it MUST be the last entry in +// the registry list. +// +// Defaults: +// BaseRef = origin/HEAD (the remote's default-branch tip) +// HeadRef = HEAD +// +// Operators can override either via NULLIFY_BASE_REF / NULLIFY_HEAD_REF +// env vars if their local-dev workflow needs something specific. +type Local struct{} + +func NewLocal() Provider { return &Local{} } + +func (l *Local) Platform() string { return "OTHER" } + +func (l *Local) Detect() bool { return true } + +func (l *Local) BaseRef(ctx context.Context) (string, error) { + if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { + return resolveRef(ctx, v) + } + return resolveRef(ctx, "origin/HEAD") +} + +func (l *Local) HeadRef(ctx context.Context) (string, error) { + if v := os.Getenv("NULLIFY_HEAD_REF"); v != "" { + return resolveRef(ctx, v) + } + return resolveRef(ctx, "HEAD") +} + +func (l *Local) PRNumber() (int, bool) { + return 0, false +} + +func (l *Local) RepoSlug() (string, string, bool) { + slug := os.Getenv("NULLIFY_REPO_SLUG") + if slug == "" { + return "", "", false + } + for i := range slug { + if slug[i] == '/' { + return slug[:i], slug[i+1:], true + } + } + return "", "", false +} + +func (l *Local) EnrichHeader(h http.Header) { + h.Set("X-Nullify-CI-Provider", l.Platform()) +} diff --git a/internal/ci/registry.go b/internal/ci/registry.go new file mode 100644 index 0000000..ca20361 --- /dev/null +++ b/internal/ci/registry.go @@ -0,0 +1,40 @@ +package ci + +// Registry enumerates every known CI provider in priority order. First +// match wins. Local MUST be last — it always returns true from Detect() +// so the CLI still works when run from a developer's laptop. +// +// When adding a new provider: +// 1. Implement Provider in a new file under this package. +// 2. Append it to the list below (before Local). +// 3. Ensure its Platform() value matches a benchmarks.PipelinePlatform +// enum — `make -C cli lint-ci-providers` verifies this. + +// Default returns the full list of providers in detection priority. +// Exposed as a constructor (not a package-level var) so tests can build +// a truncated list without touching shared state. +func Default() []Provider { + return []Provider{ + NewGitHubActions(), + NewGitLabCI(), + NewCircleCI(), + NewBitbucketPipelines(), + NewJenkins(), + NewAzureDevOps(), + NewGoogleCloudBuild(), + NewAWSCodeBuild(), + NewLocal(), // always last — always matches + } +} + +// Detect walks providers in order and returns the first one whose +// Detect() matches. Returns ErrNoProvider only if the caller passed a +// custom list without Local. +func Detect(providers []Provider) (Provider, error) { + for _, p := range providers { + if p.Detect() { + return p, nil + } + } + return nil, ErrNoProvider +} diff --git a/internal/commands/deps_analyze.go b/internal/commands/deps_analyze.go new file mode 100644 index 0000000..c892022 --- /dev/null +++ b/internal/commands/deps_analyze.go @@ -0,0 +1,411 @@ +package commands + +// RegisterDepsAnalyzeCommand wires `nullify deps analyze` — a hand-written +// CI-pipeline workflow that detects changed dependencies between two +// commits and requests malware analysis for each via scpm's public API. +// +// Follows the RegisterContextPushCommand pattern (same file pkg, +// cobra-based, built on top of the generic api.Client for auth). The +// scpm calls are hand-rolled HTTP POSTs rather than generated API +// methods because scpm's OpenAPI regen happens in a separate pipeline +// — the generator will replace this file with a thinner version once +// the spec lands in the cli repo. + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "strings" + "time" + + "github.com/nullify-platform/cli/internal/api" + "github.com/nullify-platform/cli/internal/ci" + "github.com/nullify-platform/cli/internal/scan" + "github.com/nullify-platform/cli/internal/scan/manifest" + "github.com/spf13/cobra" +) + +// ExitCodes — stable exit-code table so CI operators can gate merges. +const ( + exitNoFinding = 0 + exitVulnerableFound = 10 + exitSuspiciousFound = 20 + exitMaliciousFound = 30 + exitInvalidInvocation = 2 + exitTransientFailure = 1 +) + +// RegisterDepsAnalyzeCommand attaches `deps analyze` to the given +// parent command. api.Client is reused for auth (BaseURL + Token) — +// even though the scpm calls aren't in the generated surface yet, +// credentials and host come from the same source as every other +// nullify command. +func RegisterDepsAnalyzeCommand(parent *cobra.Command, getClient func() *api.Client) { + var depsCmd *cobra.Command + for _, c := range parent.Commands() { + if c.Name() == "deps" { + depsCmd = c + break + } + } + if depsCmd == nil { + depsCmd = &cobra.Command{ + Use: "deps", + Short: "Dependency analysis commands", + } + parent.AddCommand(depsCmd) + } + + var ( + baseRef string + headRef string + repoPath string + wait bool + failOn string + outputFormat string + idempotencySeed string + ) + + analyzeCmd := &cobra.Command{ + Use: "analyze", + Short: "Detect changed dependencies + request malware analysis", + Long: `Analyse dependencies that changed between two commits. + +Detects the current CI/CD platform, computes the dependency diff +between --base (default: CI-provided target branch) and --head (default +HEAD), and requests malware analysis for each new or bumped dep via +scpm /scpm/dependencies/analyze. + +Exit codes: + 0 no concerning findings + 10 vulnerable dependency detected + 20 suspicious malware signal + 30 confirmed malicious + 2 invalid invocation + 1 transient failure (retry) +`, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + return runDepsAnalyze(ctx, getClient(), depsAnalyzeOpts{ + BaseRef: baseRef, + HeadRef: headRef, + RepoPath: repoPath, + Wait: wait, + FailOn: failOn, + OutputFormat: outputFormat, + IdempotencySeed: idempotencySeed, + }) + }, + } + + analyzeCmd.Flags().StringVar(&baseRef, "base", "", "Base commit SHA or ref; defaults to the CI-provided target branch") + analyzeCmd.Flags().StringVar(&headRef, "head", "HEAD", "Head commit SHA or ref") + analyzeCmd.Flags().StringVar(&repoPath, "repo", ".", "Path to the git repository") + analyzeCmd.Flags().BoolVar(&wait, "wait", false, "Block until every analysis job reaches a terminal verdict") + analyzeCmd.Flags().StringVar(&failOn, "fail-on", "malicious", "Exit non-zero when any finding of this severity or worse: vulnerable|suspicious|malicious|none") + analyzeCmd.Flags().StringVar(&outputFormat, "format", "text", "Output format: text|json") + analyzeCmd.Flags().StringVar(&idempotencySeed, "idempotency-seed", "", "Prefix for the scpm idempotency key; default is CI-provider+run-id") + depsCmd.AddCommand(analyzeCmd) +} + +type depsAnalyzeOpts struct { + BaseRef string + HeadRef string + RepoPath string + Wait bool + FailOn string + OutputFormat string + IdempotencySeed string +} + +// scpmAnalyzeRequest / Response mirror the shape in +// scpm/internal/endpoints/dependencies_analyze_post.go. Kept in-file +// (not a shared client package) because until the OpenAPI +// regeneration cycle completes, these are the only structs the CLI +// needs. +type scpmAnalyzeRequest struct { + Ecosystem string `json:"ecosystem"` + Name string `json:"name"` + Version string `json:"version"` + PreviousVersion string `json:"previousVersion,omitempty"` + IdempotencyKey string `json:"idempotencyKey,omitempty"` +} + +type scpmAnalyzeResponse struct { + JobID string `json:"jobId"` + Status string `json:"status"` + CacheHit bool `json:"cacheHit"` + Verdict string `json:"verdict,omitempty"` +} + +func runDepsAnalyze(ctx context.Context, client *api.Client, opts depsAnalyzeOpts) error { + if client == nil || client.BaseURL == "" { + return exitError(exitInvalidInvocation, "no API client configured (run `nullify auth login` first)") + } + + // Detect provider so we can auto-fill BaseRef when omitted + stamp + // CI headers on every outbound request. + provider, err := ci.Detect(ci.Default()) + if err != nil { + return exitError(exitInvalidInvocation, "detect CI provider: %v", err) + } + + base := opts.BaseRef + if base == "" { + base, err = provider.BaseRef(ctx) + if err != nil { + return exitError(exitInvalidInvocation, "resolve base ref: %v", err) + } + } + head := opts.HeadRef + if head == "" { + head, err = provider.HeadRef(ctx) + if err != nil { + return exitError(exitInvalidInvocation, "resolve head ref: %v", err) + } + } + // If caller passed symbolic refs, resolve to commits so the scpm + // audit log shows the exact SHA. + base, err = resolveCommit(ctx, opts.RepoPath, base) + if err != nil { + return exitError(exitInvalidInvocation, "resolve base %q: %v", base, err) + } + head, err = resolveCommit(ctx, opts.RepoPath, head) + if err != nil { + return exitError(exitInvalidInvocation, "resolve head %q: %v", head, err) + } + + fmt.Fprintf(os.Stderr, "nullify deps analyze: %s (%s → %s)\n", provider.Platform(), short(base), short(head)) + + changed, err := scan.Diff(ctx, opts.RepoPath, base, head, manifest.DefaultParsers()) + if err != nil { + return exitError(exitTransientFailure, "compute dep diff: %v", err) + } + if len(changed) == 0 { + fmt.Fprintln(os.Stderr, "nullify deps analyze: no dependency changes") + return nil + } + + // Only "added" and "bumped" deps have a new version worth analysing. + actionable := make([]scan.ChangedDep, 0, len(changed)) + for _, d := range changed { + if d.Kind == "added" || d.Kind == "bumped" { + actionable = append(actionable, d) + } + } + fmt.Fprintf(os.Stderr, "nullify deps analyze: %d changed, %d actionable\n", len(changed), len(actionable)) + + results := []scpmAnalyzeResponse{} + worstVerdict := "" + for _, d := range actionable { + req := scpmAnalyzeRequest{ + Ecosystem: d.Ecosystem, + Name: d.Name, + Version: d.Version, + PreviousVersion: d.PreviousVersion, + IdempotencyKey: buildIdempotencyKey(opts.IdempotencySeed, provider, d), + } + resp, err := postSCPMAnalyze(ctx, client, provider, req) + if err != nil { + // Single-dep failure shouldn't sink the whole run — + // surface and continue. The exit code still reflects the + // worst observed verdict across successful calls. + fmt.Fprintf(os.Stderr, " %s/%s@%s: analyze failed: %v\n", d.Ecosystem, d.Name, d.Version, err) + continue + } + results = append(results, *resp) + if v := verdictRank(resp.Verdict); v > verdictRank(worstVerdict) { + worstVerdict = resp.Verdict + } + } + + if err := renderResults(opts.OutputFormat, actionable, results); err != nil { + return exitError(exitTransientFailure, "render: %v", err) + } + + return checkFailOn(opts.FailOn, worstVerdict) +} + +// buildIdempotencyKey combines the caller-supplied seed (or the CI +// provider's run ID) with the (ecosystem, name, version) tuple. This +// makes a second invocation of the same CI run return cached results +// even if the CI is re-triggered (common on flaky CI). +func buildIdempotencyKey(seed string, p ci.Provider, d scan.ChangedDep) string { + if seed == "" { + if v := os.Getenv("NULLIFY_IDEMPOTENCY_SEED"); v != "" { + seed = v + } + } + if seed == "" { + // Per-CI default: the run ID header the provider emits. + seed = fmt.Sprintf("ci-%s", p.Platform()) + } + return fmt.Sprintf("%s|%s|%s|%s", seed, d.Ecosystem, d.Name, d.Version) +} + +func postSCPMAnalyze(ctx context.Context, client *api.Client, provider ci.Provider, req scpmAnalyzeRequest) (*scpmAnalyzeResponse, error) { + body, err := json.Marshal(req) + if err != nil { + return nil, err + } + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, client.BaseURL+"/scpm/dependencies/analyze", bytes.NewReader(body)) + if err != nil { + return nil, err + } + httpReq.Header.Set("Content-Type", "application/json") + if client.Token != "" { + httpReq.Header.Set("Authorization", "Bearer "+client.Token) + } + provider.EnrichHeader(httpReq.Header) + + httpClient := client.HTTPClient + if httpClient == nil { + httpClient = &http.Client{Timeout: 60 * time.Second} + } + resp, err := httpClient.Do(httpReq) + if err != nil { + return nil, err + } + defer resp.Body.Close() //nolint:errcheck + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + b, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) + return nil, fmt.Errorf("scpm %d: %s", resp.StatusCode, strings.TrimSpace(string(b))) + } + var out scpmAnalyzeResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return nil, err + } + return &out, nil +} + +// verdictRank maps verdict strings to a comparable int so we can +// compute "worst seen" across a batch. Unknown verdicts sort below +// benign so they don't accidentally trip --fail-on=vulnerable. +func verdictRank(v string) int { + switch v { + case "confirmed_malicious": + return 4 + case "suspicious": + return 3 + case "vulnerable": + return 2 + case "benign": + return 1 + default: + return 0 + } +} + +// checkFailOn maps (--fail-on, worst-verdict) to an exit code. +func checkFailOn(failOn, worst string) error { + threshold := verdictRank(verdictThreshold(failOn)) + actual := verdictRank(worst) + if actual < threshold { + return nil + } + switch worst { + case "confirmed_malicious": + return exitError(exitMaliciousFound, "malicious dependency detected") + case "suspicious": + return exitError(exitSuspiciousFound, "suspicious dependency detected") + default: + return exitError(exitVulnerableFound, "concerning verdict: %q", worst) + } +} + +func verdictThreshold(failOn string) string { + switch failOn { + case "none": + return "" // unreachable — verdictRank("") is 0, never crossed + case "vulnerable": + return "vulnerable" + case "suspicious": + return "suspicious" + case "malicious", "": + return "confirmed_malicious" + } + return "confirmed_malicious" +} + +func renderResults(format string, changed []scan.ChangedDep, results []scpmAnalyzeResponse) error { + switch format { + case "json": + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(map[string]any{"changed": changed, "results": results}) + case "text", "": + for i, d := range changed { + if i >= len(results) { + break + } + r := results[i] + prev := d.PreviousVersion + if prev == "" { + prev = "(new)" + } + verdict := r.Verdict + if verdict == "" { + verdict = r.Status + } + fmt.Fprintf(os.Stdout, " %s/%s %s → %s [%s] job=%s\n", + d.Ecosystem, d.Name, prev, d.Version, verdict, r.JobID) + } + return nil + default: + return fmt.Errorf("unknown --format %q", format) + } +} + +// --- small helpers --- + +func resolveCommit(ctx context.Context, repoPath, ref string) (string, error) { + cmd := exec.CommandContext(ctx, "git", "rev-parse", "--verify", ref+"^{commit}") + cmd.Dir = repoPath + out, err := cmd.Output() + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil +} + +func short(sha string) string { + if len(sha) > 10 { + return sha[:10] + } + return sha +} + +// exitErr wraps an error with an exit code so the top-level cobra +// handler can translate it to os.Exit(N). Declared locally so this +// command doesn't drag in the cli's main-pkg exit-code wiring. +type exitErr struct { + Code int + Msg string +} + +func (e exitErr) Error() string { return e.Msg } + +func exitError(code int, format string, args ...any) error { + return exitErr{Code: code, Msg: fmt.Sprintf(format, args...)} +} + +// ExitCodeFromError returns the exit code an exitErr wants, or 1 for +// any other error. Callers use this in place of the cli's standard +// error-to-exit logic. +func ExitCodeFromError(err error) int { + if err == nil { + return 0 + } + var ee exitErr + if errors.As(err, &ee) { + return ee.Code + } + return exitTransientFailure +} diff --git a/internal/scan/diff.go b/internal/scan/diff.go new file mode 100644 index 0000000..72a4926 --- /dev/null +++ b/internal/scan/diff.go @@ -0,0 +1,178 @@ +// Package scan orchestrates the CLI's "what changed between these +// two commits" logic. It wraps git ops, manifest parsing, and the +// set-difference that produces the (added, bumped, removed) lists +// deps-analyze consumes. +package scan + +import ( + "bytes" + "context" + "fmt" + "os/exec" + "strings" + + "github.com/nullify-platform/cli/internal/scan/manifest" +) + +// ChangedDep is one dependency whose (ecosystem, name, version) tuple +// changed between base and head. Kind distinguishes the three change +// modes so the workflow can prioritise "new version + bumped version" +// for analyze calls while skipping removals (removed deps don't have a +// new version to analyse). +type ChangedDep struct { + Ecosystem string + Name string + Version string + PreviousVersion string // Empty for "added". + Kind string // "added" | "bumped" | "removed" + File string // Lockfile path change was detected in +} + +// Diff compares two commits by reading their lockfiles via `git show` +// and diffing the parsed manifest entries. Returns the changed deps +// ordered by (file, name) for stable display. +func Diff(ctx context.Context, repoPath, baseRef, headRef string, parsers []manifest.Parser) ([]ChangedDep, error) { + // List files changed between the two revs that are lockfiles we + // know how to parse. Cheaper than reading every lockfile in the + // repo for both revs — a monorepo can have hundreds. + changedPaths, err := listChangedFiles(ctx, repoPath, baseRef, headRef) + if err != nil { + return nil, err + } + lockfilePaths := filterLockfiles(changedPaths, parsers) + if len(lockfilePaths) == 0 { + return nil, nil + } + + baseEntries, err := parseAtRef(ctx, repoPath, baseRef, lockfilePaths, parsers) + if err != nil { + return nil, fmt.Errorf("parse base lockfiles: %w", err) + } + headEntries, err := parseAtRef(ctx, repoPath, headRef, lockfilePaths, parsers) + if err != nil { + return nil, fmt.Errorf("parse head lockfiles: %w", err) + } + + return diffEntries(baseEntries, headEntries), nil +} + +// listChangedFiles runs `git diff --name-only base..head`. Missing +// lockfiles (e.g., deleted at head) are still returned — the parser +// just produces zero entries for that rev. +func listChangedFiles(ctx context.Context, repoPath, base, head string) ([]string, error) { + cmd := exec.CommandContext(ctx, "git", "diff", "--name-only", base+".."+head) + cmd.Dir = repoPath + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("git diff --name-only: %w", err) + } + var paths []string + for _, line := range strings.Split(string(out), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + paths = append(paths, line) + } + return paths, nil +} + +func filterLockfiles(paths []string, parsers []manifest.Parser) []string { + out := []string{} + for _, p := range paths { + for _, parser := range parsers { + if parser.Matches(p) { + out = append(out, p) + break + } + } + } + return out +} + +// parseAtRef does `git show :` for each lockfile + feeds +// the bytes to the matching parser. Missing-at-this-ref is NOT an error +// — the file may have been added (not in base) or deleted (not in +// head); both are normal. +func parseAtRef( + ctx context.Context, + repoPath, ref string, + paths []string, + parsers []manifest.Parser, +) (map[entryKey]manifest.Entry, error) { + files := make([]manifest.File, 0, len(paths)) + for _, p := range paths { + data, err := gitShow(ctx, repoPath, ref, p) + if err != nil { + // File doesn't exist at this ref — skip silently. + continue + } + files = append(files, manifest.File{Path: p, Data: data}) + } + res := manifest.ParseAll(parsers, files) + out := map[entryKey]manifest.Entry{} + for _, e := range res.Entries { + out[entryKey{Ecosystem: e.Ecosystem, Name: e.Name}] = e + } + return out, nil +} + +type entryKey struct { + Ecosystem string + Name string +} + +// gitShow captures `git show :`. Returns an error that +// callers treat as "file absent at this ref." +func gitShow(ctx context.Context, repoPath, ref, path string) ([]byte, error) { + var stdout bytes.Buffer + cmd := exec.CommandContext(ctx, "git", "show", ref+":"+path) + cmd.Dir = repoPath + cmd.Stdout = &stdout + if err := cmd.Run(); err != nil { + return nil, err + } + return stdout.Bytes(), nil +} + +// diffEntries produces the (added, bumped, removed) set from the +// base→head entry maps. Keyed by (ecosystem, name) — so a package +// renamed mid-flight appears as one removed + one added entry, which +// is the right shape for analysis. +func diffEntries(base, head map[entryKey]manifest.Entry) []ChangedDep { + out := []ChangedDep{} + for k, headEntry := range head { + baseEntry, existsInBase := base[k] + switch { + case !existsInBase: + out = append(out, ChangedDep{ + Ecosystem: headEntry.Ecosystem, + Name: headEntry.Name, + Version: headEntry.Version, + Kind: "added", + File: headEntry.File, + }) + case baseEntry.Version != headEntry.Version: + out = append(out, ChangedDep{ + Ecosystem: headEntry.Ecosystem, + Name: headEntry.Name, + Version: headEntry.Version, + PreviousVersion: baseEntry.Version, + Kind: "bumped", + File: headEntry.File, + }) + } + } + for k, baseEntry := range base { + if _, stillInHead := head[k]; !stillInHead { + out = append(out, ChangedDep{ + Ecosystem: baseEntry.Ecosystem, + Name: baseEntry.Name, + PreviousVersion: baseEntry.Version, + Kind: "removed", + File: baseEntry.File, + }) + } + } + return out +} diff --git a/internal/scan/manifest/cargo_lock.go b/internal/scan/manifest/cargo_lock.go new file mode 100644 index 0000000..06a0611 --- /dev/null +++ b/internal/scan/manifest/cargo_lock.go @@ -0,0 +1,98 @@ +package manifest + +// Cargo.lock (Rust) — TOML format with repeated `[[package]]` tables. +// Each entry has `name`, `version`, and optionally `source`. Workspace +// members (no `source`) are the repo's own crates and not interesting +// for vuln lookup; we skip them. +// +// Rather than pull in a full TOML parser, we hand-roll a tiny scanner +// since the Cargo.lock shape is well-behaved and we only need two +// fields per package. + +import ( + "bufio" + "bytes" + "strings" +) + +type CargoLock struct{} + +func NewCargoLock() Parser { return &CargoLock{} } + +func (c *CargoLock) Name() string { return "Cargo.lock" } + +func (c *CargoLock) Matches(path string) bool { + return HasSuffixI(path, "Cargo.lock") +} + +func (c *CargoLock) Parse(data []byte, path string) ([]Entry, error) { + out := []Entry{} + scanner := bufio.NewScanner(bytes.NewReader(data)) + + inPackage := false + var name, version, source string + + flush := func() { + defer func() { name, version, source = "", "", "" }() + // Workspace member: no source → skip (the vuln-database has no + // record of the repo's own crates). + if source == "" || name == "" || version == "" { + return + } + out = append(out, Entry{ + Ecosystem: "crates.io", + Name: name, + Version: version, + File: path, + // Cargo.lock doesn't mark direct-vs-transitive; the root + // manifest would, but we're not reading it here. + Direct: false, + }) + } + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + switch { + case line == "[[package]]": + if inPackage { + flush() + } + inPackage = true + case strings.HasPrefix(line, "[") && line != "[[package]]": + // New non-package section — flush any pending package. + if inPackage { + flush() + inPackage = false + } + case inPackage: + switch { + case strings.HasPrefix(line, "name = "): + name = stripTOMLString(line[len("name = "):]) + case strings.HasPrefix(line, "version = "): + version = stripTOMLString(line[len("version = "):]) + case strings.HasPrefix(line, "source = "): + source = stripTOMLString(line[len("source = "):]) + } + } + } + if inPackage { + flush() + } + if err := scanner.Err(); err != nil { + return out, err + } + return out, nil +} + +// stripTOMLString removes surrounding quotes from a TOML string value. +// Doesn't handle escapes — name/version are well-formed package +// identifiers, never contain quotes. +func stripTOMLString(s string) string { + s = strings.TrimSpace(s) + s = strings.TrimSuffix(s, ",") + s = strings.TrimSpace(s) + if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' { + return s[1 : len(s)-1] + } + return s +} diff --git a/internal/scan/manifest/gemfile_lock.go b/internal/scan/manifest/gemfile_lock.go new file mode 100644 index 0000000..93c1fe9 --- /dev/null +++ b/internal/scan/manifest/gemfile_lock.go @@ -0,0 +1,104 @@ +package manifest + +// Gemfile.lock (RubyGems) — bespoke line-oriented format. +// +// Shape (abbreviated): +// GEM +// remote: https://rubygems.org/ +// specs: +// activerecord (7.1.3) +// activemodel (= 7.1.3) +// activesupport (= 7.1.3) +// i18n (1.14.1) +// concurrent-ruby (~> 1.0) +// PLATFORMS +// x86_64-linux +// DEPENDENCIES +// rails +// +// We care about the GEM section, specifically lines under `specs:` +// where the name + pinned version sit at indent-level 4. Transitive +// deps listed under a parent spec are at indent-level 6; we skip them +// because they're NOT pinned (only version constraints), and we'll +// see their pinned version when they appear as top-level specs. + +import ( + "bufio" + "bytes" + "strings" +) + +type GemfileLock struct{} + +func NewGemfileLock() Parser { return &GemfileLock{} } + +func (g *GemfileLock) Name() string { return "Gemfile.lock" } + +func (g *GemfileLock) Matches(path string) bool { + return HasSuffixI(path, "Gemfile.lock") +} + +func (g *GemfileLock) Parse(data []byte, path string) ([]Entry, error) { + out := []Entry{} + scanner := bufio.NewScanner(bytes.NewReader(data)) + inGemSpecs := false + for scanner.Scan() { + raw := scanner.Text() + // Track section boundaries. GEM section holds spec entries; + // DEPENDENCIES / PLATFORMS / BUNDLED WITH don't. + if !strings.HasPrefix(raw, " ") && !strings.HasPrefix(raw, "\t") { + inGemSpecs = false + continue + } + trimmed := strings.TrimSpace(raw) + if trimmed == "specs:" { + inGemSpecs = true + continue + } + if !inGemSpecs { + continue + } + // Indent-4 lines are pinned spec entries: + // " activerecord (7.1.3)" + // Indent-6 lines are transitive-dep declarations: + // " activemodel (= 7.1.3)" + // Count leading spaces to distinguish. + leading := 0 + for i := 0; i < len(raw); i++ { + if raw[i] != ' ' { + break + } + leading++ + } + if leading != 4 { + continue + } + // Parse "name (version)". + openParen := strings.Index(trimmed, " (") + if openParen < 0 { + continue + } + closeParen := strings.LastIndex(trimmed, ")") + if closeParen <= openParen+2 { + continue + } + name := trimmed[:openParen] + version := trimmed[openParen+2 : closeParen] + if name == "" || version == "" { + continue + } + out = append(out, Entry{ + Ecosystem: "rubygems", + Name: name, + Version: version, + File: path, + // We can't tell direct-vs-transitive without cross-checking + // DEPENDENCIES section. Leave false. + Direct: false, + }) + } + if err := scanner.Err(); err != nil { + return out, err + } + return out, nil +} diff --git a/internal/scan/manifest/go_mod.go b/internal/scan/manifest/go_mod.go new file mode 100644 index 0000000..9b599c8 --- /dev/null +++ b/internal/scan/manifest/go_mod.go @@ -0,0 +1,69 @@ +package manifest + +// Go module: parse go.sum rather than go.mod. +// +// go.sum lists every version of every module the build ever saw, one +// module-version per line. go.mod lists the _declared_ requirements, +// which is a smaller set but doesn't include transitive resolutions. +// For malware-analysis purposes we want what actually got pulled in; +// go.sum is the right source. +// +// Line shape: " h1:" +// Dup lines per version (one for "h1:" hash of zip, one for "/go.mod" +// hash) — dedupe by (path, version). + +import ( + "bufio" + "bytes" + "strings" +) + +type GoMod struct{} + +func NewGoMod() Parser { return &GoMod{} } + +func (g *GoMod) Name() string { return "go.sum" } + +func (g *GoMod) Matches(path string) bool { + return HasSuffixI(path, "go.sum") +} + +func (g *GoMod) Parse(data []byte, path string) ([]Entry, error) { + seen := map[string]bool{} + out := []Entry{} + scanner := bufio.NewScanner(bytes.NewReader(data)) + // go.sum lines can be long; default buffer is 64k. Bump to 1MB so + // pathological monorepo go.sum files don't truncate. + scanner.Buffer(make([]byte, 1024*1024), 1024*1024) + for scanner.Scan() { + fields := strings.Fields(scanner.Text()) + if len(fields) < 3 { + continue + } + modPath := fields[0] + version := fields[1] + // Skip "/go.mod" hash rows — same (path, version) info as the + // zip row, dedupe would catch it but the check is cheap. + if strings.HasSuffix(version, "/go.mod") { + continue + } + key := modPath + "@" + version + if seen[key] { + continue + } + seen[key] = true + out = append(out, Entry{ + Ecosystem: "go", + Name: modPath, + Version: version, + File: path, + // go.sum doesn't distinguish direct vs transitive; leave + // Direct false + document the limitation. + Direct: false, + }) + } + if err := scanner.Err(); err != nil { + return out, err + } + return out, nil +} diff --git a/internal/scan/manifest/manifest.go b/internal/scan/manifest/manifest.go new file mode 100644 index 0000000..fe3a651 --- /dev/null +++ b/internal/scan/manifest/manifest.go @@ -0,0 +1,100 @@ +// Package manifest parses lockfiles + manifests to produce a flat +// (ecosystem, name, version) list that nullify deps analyze compares +// between two commits. +// +// Each ecosystem implementation lives in its own file. ParseAll +// dispatches by filename — callers pass a list of paths (usually +// computed from a git diff) and receive every parseable entry. +// Unknown paths are silently skipped; there's no "no parser for X" +// error because most repos contain files that aren't lockfiles. +package manifest + +import ( + "errors" + "path/filepath" + "strings" +) + +// Entry is one parsed dependency record. Ecosystem matches the +// vdb_ecosystem enum values so we don't transform names between the CLI +// and vuln-database. +type Entry struct { + Ecosystem string + Name string + Version string + // File is the repo-relative path this entry came from — useful for + // error reporting and for scpm's audit log. + File string + // Direct is true when the lockfile declares the package at the top + // level of its "dependencies" block. False for transitive deps. + // Some formats (go.sum, Cargo.lock) don't distinguish; in that + // case we leave it false and document the limitation per-parser. + Direct bool +} + +// Parser is the per-file-format interface. Implementations are +// registered in parsers.go. Parse is given the file's bytes + its +// repo-relative path; it returns a flat Entry slice or an error. +type Parser interface { + Name() string + Matches(repoRelPath string) bool + Parse(data []byte, repoRelPath string) ([]Entry, error) +} + +// ErrNoParser is returned when no registered parser matches a path. +// ParseAll uses errors.Is to filter it out silently — the CLI +// workflow doesn't need to surface "we don't know what this file is." +var ErrNoParser = errors.New("no parser matched") + +// ParseAll applies every registered parser to the given paths + data +// slice. The slice is (path, contents) pairs; missing entries are +// skipped. Returns a flat slice of Entry + a map of path→parser-error +// for entries the parser matched but couldn't parse (malformed +// lockfile, partial write, etc.). +type File struct { + Path string + Data []byte +} + +type Result struct { + Entries []Entry + Errors map[string]error +} + +func ParseAll(parsers []Parser, files []File) Result { + out := Result{Errors: map[string]error{}} + for _, f := range files { + for _, p := range parsers { + if !p.Matches(f.Path) { + continue + } + entries, err := p.Parse(f.Data, f.Path) + if err != nil { + out.Errors[f.Path] = err + continue + } + out.Entries = append(out.Entries, entries...) + break // first matching parser wins; don't double-count + } + } + return out +} + +// DefaultParsers returns the full set in a stable order. Sequence +// matters only for tiebreaking when two parsers claim the same path +// (shouldn't happen with the current set). +func DefaultParsers() []Parser { + return []Parser{ + NewNPMLock(), + NewPyPILock(), + NewGoMod(), + NewCargoLock(), + NewGemfileLock(), + } +} + +// HasSuffixI is a case-insensitive suffix match shared by every parser. +// Saves every Matches() from importing strings directly. +func HasSuffixI(path, suffix string) bool { + return strings.EqualFold(filepath.Base(path), suffix) +} diff --git a/internal/scan/manifest/npm_lock.go b/internal/scan/manifest/npm_lock.go new file mode 100644 index 0000000..4f99631 --- /dev/null +++ b/internal/scan/manifest/npm_lock.go @@ -0,0 +1,87 @@ +package manifest + +// package-lock.json (npm v2/v3) parser. +// +// Shape summary: +// { +// "lockfileVersion": 2 | 3, +// "packages": { +// "": {...root...}, +// "node_modules/lodash": {"version": "4.17.21", "dev": false, ...}, +// "node_modules/@foo/bar": {"version": "1.0.0", ...} +// }, +// "dependencies": {...v1-style tree...} // v2 only, ignored here +// } +// +// We read the "packages" map and skip entries whose key doesn't start +// with "node_modules/" (the empty-key root package isn't a dependency +// we care about). Name is derived from the key suffix. + +import ( + "encoding/json" + "strings" +) + +type NPMLock struct{} + +func NewNPMLock() Parser { return &NPMLock{} } + +func (n *NPMLock) Name() string { return "package-lock.json" } + +func (n *NPMLock) Matches(path string) bool { + return HasSuffixI(path, "package-lock.json") +} + +type npmLockFile struct { + LockfileVersion int `json:"lockfileVersion"` + Packages map[string]npmLockPackageEntry `json:"packages"` +} + +type npmLockPackageEntry struct { + Version string `json:"version"` + Dev bool `json:"dev"` +} + +func (n *NPMLock) Parse(data []byte, path string) ([]Entry, error) { + var lock npmLockFile + if err := json.Unmarshal(data, &lock); err != nil { + return nil, err + } + out := make([]Entry, 0, len(lock.Packages)) + for key, pkg := range lock.Packages { + if key == "" { + continue // root package metadata, not a dependency + } + name := stripNodeModulesPrefix(key) + if name == "" || pkg.Version == "" { + continue + } + out = append(out, Entry{ + Ecosystem: "npm", + Name: name, + Version: pkg.Version, + File: path, + // Lockfile doesn't reliably expose "declared by root vs + // transitive"; leave Direct false. The deps-analyze + // workflow treats every change as worth inspecting. + Direct: false, + }) + } + return out, nil +} + +// stripNodeModulesPrefix turns the lockfile key shape into the npm +// package name. Handles nested node_modules (transitive conflicts) by +// taking only the last node_modules/ segment: +// "node_modules/lodash" → "lodash" +// "node_modules/a/node_modules/lodash" → "lodash" +// "node_modules/@scope/pkg" → "@scope/pkg" +// "node_modules/a/node_modules/@scope/pkg" → "@scope/pkg" +func stripNodeModulesPrefix(key string) string { + const prefix = "node_modules/" + idx := strings.LastIndex(key, prefix) + if idx < 0 { + return "" + } + return key[idx+len(prefix):] +} diff --git a/internal/scan/manifest/pypi_lock.go b/internal/scan/manifest/pypi_lock.go new file mode 100644 index 0000000..81fa81b --- /dev/null +++ b/internal/scan/manifest/pypi_lock.go @@ -0,0 +1,99 @@ +package manifest + +// PyPI: requirements.txt parser. Handles the PEP 440 "foo==1.2.3" shape +// + common comment / editable-install forms by skipping them. +// +// We intentionally target requirements.txt (rather than poetry.lock / +// uv.lock) because: +// - Most security-scanning pipelines emit pinned requirements.txt +// regardless of the dev tooling, so parsing this covers the +// widest surface with the least code. +// - poetry.lock / uv.lock / Pipfile.lock all have richer formats +// (TOML / custom) that are follow-up work. +// +// Lines we recognise as pinned deps: +// foo==1.2.3 +// foo==1.2.3 # comment +// foo ~= 1.2 # skipped (non-exact pin — not useful for analysis) +// Skipped: comments, blank lines, -r/-e directives, git+ URLs, wheels. + +import ( + "bufio" + "bytes" + "strings" +) + +type PyPILock struct{} + +func NewPyPILock() Parser { return &PyPILock{} } + +func (p *PyPILock) Name() string { return "requirements.txt" } + +func (p *PyPILock) Matches(path string) bool { + return HasSuffixI(path, "requirements.txt") || + HasSuffixI(path, "requirements-dev.txt") || + HasSuffixI(path, "requirements_prod.txt") +} + +func (p *PyPILock) Parse(data []byte, path string) ([]Entry, error) { + out := []Entry{} + scanner := bufio.NewScanner(bytes.NewReader(data)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + // Strip inline comments. + if idx := strings.Index(line, "#"); idx >= 0 { + line = strings.TrimSpace(line[:idx]) + } + // Skip directive lines (-r, -e, --index-url, etc.). + if strings.HasPrefix(line, "-") { + continue + } + // Skip URL-based deps (git+, https://). + if strings.HasPrefix(line, "git+") || strings.HasPrefix(line, "http") { + continue + } + // Only exact pins. `==` is the PEP 440 equality operator; we + // want the left side as name, right side as version. + eq := strings.Index(line, "==") + if eq < 0 { + continue + } + name := strings.TrimSpace(line[:eq]) + ver := strings.TrimSpace(line[eq+2:]) + // Handle `name==1.2.3; python_version >= "3.8"` environment + // markers — strip everything after the first semicolon. + if semi := strings.Index(ver, ";"); semi >= 0 { + ver = strings.TrimSpace(ver[:semi]) + } + // Strip wheel/hash suffixes like `==1.2.3 --hash=sha256:...`. + if sp := strings.IndexByte(ver, ' '); sp >= 0 { + ver = ver[:sp] + } + // Normalise the PyPI name per PEP 503 — lower-case, hyphens + // and underscores interchangeable. We store the raw name, but + // downstream vuln-database treats "requests" and "Requests" + // as identical, so exact-casing matters less than completeness. + name = strings.TrimSpace(name) + if name == "" || ver == "" { + continue + } + // Strip extras: `requests[security]==2.32.3` → "requests". + if bracket := strings.Index(name, "["); bracket >= 0 { + name = name[:bracket] + } + out = append(out, Entry{ + Ecosystem: "pypi", + Name: name, + Version: ver, + File: path, + Direct: true, // requirements.txt entries are all explicitly declared + }) + } + if err := scanner.Err(); err != nil { + return out, err + } + return out, nil +} From 357a2555d0a69dd4ac6a915732c61207993917f9 Mon Sep 17 00:00:00 2001 From: Tim Thacker Date: Mon, 25 May 2026 20:22:54 +1000 Subject: [PATCH 2/4] style(cli): gofmt doc-comment blocks in CI provider files golangci-lint v2.12.0 gofmt flagged the space-indented env-var tables in the AWS CodeBuild / Azure DevOps / Bitbucket provider doc comments; reflow to the Go 1.19 preformatted form (blank // + tab indent). Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/ci/aws_codebuild.go | 9 +++++---- internal/ci/azure_devops.go | 13 +++++++------ internal/ci/bitbucket_pipelines.go | 11 ++++++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/internal/ci/aws_codebuild.go b/internal/ci/aws_codebuild.go index bce81a4..a10ca53 100644 --- a/internal/ci/aws_codebuild.go +++ b/internal/ci/aws_codebuild.go @@ -10,10 +10,11 @@ import ( // AWSCodeBuild — https://docs.aws.amazon.com/codebuild/latest/userguide/build-env-ref-env-vars.html // // Key envs: -// CODEBUILD_BUILD_ID signature -// CODEBUILD_RESOLVED_SOURCE_VERSION head -// CODEBUILD_WEBHOOK_BASE_REF base (webhook-triggered PRs) -// CODEBUILD_SOURCE_REPO_URL https://github.com/org/repo.git +// +// CODEBUILD_BUILD_ID signature +// CODEBUILD_RESOLVED_SOURCE_VERSION head +// CODEBUILD_WEBHOOK_BASE_REF base (webhook-triggered PRs) +// CODEBUILD_SOURCE_REPO_URL https://github.com/org/repo.git type AWSCodeBuild struct{} func NewAWSCodeBuild() Provider { return &AWSCodeBuild{} } diff --git a/internal/ci/azure_devops.go b/internal/ci/azure_devops.go index 57a35a6..7d4d5b7 100644 --- a/internal/ci/azure_devops.go +++ b/internal/ci/azure_devops.go @@ -10,12 +10,13 @@ import ( // AzureDevOps — https://learn.microsoft.com/en-us/azure/devops/pipelines/build/variables // // Key envs: -// TF_BUILD=True signature -// BUILD_SOURCEVERSION head commit -// SYSTEM_PULLREQUEST_TARGETBRANCHNAME target branch (PR builds) -// SYSTEM_PULLREQUEST_PULLREQUESTNUMBER PR number -// BUILD_REPOSITORY_NAME repo name (no owner) -// BUILD_BUILDID run id +// +// TF_BUILD=True signature +// BUILD_SOURCEVERSION head commit +// SYSTEM_PULLREQUEST_TARGETBRANCHNAME target branch (PR builds) +// SYSTEM_PULLREQUEST_PULLREQUESTNUMBER PR number +// BUILD_REPOSITORY_NAME repo name (no owner) +// BUILD_BUILDID run id type AzureDevOps struct{} func NewAzureDevOps() Provider { return &AzureDevOps{} } diff --git a/internal/ci/bitbucket_pipelines.go b/internal/ci/bitbucket_pipelines.go index 6fcc996..729ee5a 100644 --- a/internal/ci/bitbucket_pipelines.go +++ b/internal/ci/bitbucket_pipelines.go @@ -10,11 +10,12 @@ import ( // BitbucketPipelines — https://support.atlassian.com/bitbucket-cloud/docs/variables-and-secrets/ // // Key envs: -// BITBUCKET_BUILD_NUMBER signature -// BITBUCKET_COMMIT head -// BITBUCKET_PR_DESTINATION_BRANCH target branch (PR builds) -// BITBUCKET_PR_ID PR number -// BITBUCKET_REPO_OWNER / REPO_SLUG +// +// BITBUCKET_BUILD_NUMBER signature +// BITBUCKET_COMMIT head +// BITBUCKET_PR_DESTINATION_BRANCH target branch (PR builds) +// BITBUCKET_PR_ID PR number +// BITBUCKET_REPO_OWNER / REPO_SLUG type BitbucketPipelines struct{} func NewBitbucketPipelines() Provider { return &BitbucketPipelines{} } From ceff9696c877583ba5277da4227760632bec4c30 Mon Sep 17 00:00:00 2001 From: Tim Thacker Date: Mon, 25 May 2026 20:25:52 +1000 Subject: [PATCH 3/4] style(cli): gofmt remaining deps-analyze files Reflow the rest of the CI-provider doc-comment tables to Go 1.19 preformatted form, and fix const/var/struct-tag alignment in deps_analyze.go and npm_lock.go. golangci-lint caps gofmt reports at 3 per run, so these surfaced only after the first batch was fixed. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/ci/circleci.go | 19 ++++++++++--------- internal/ci/github_actions.go | 11 ++++++----- internal/ci/gitlab_ci.go | 15 ++++++++------- internal/ci/jenkins.go | 11 ++++++----- internal/ci/local.go | 5 +++-- internal/commands/deps_analyze.go | 24 ++++++++++++------------ internal/scan/manifest/npm_lock.go | 11 ++++++----- 7 files changed, 51 insertions(+), 45 deletions(-) diff --git a/internal/ci/circleci.go b/internal/ci/circleci.go index 2b3136a..d61e96e 100644 --- a/internal/ci/circleci.go +++ b/internal/ci/circleci.go @@ -12,15 +12,16 @@ import ( // CircleCI — https://circleci.com/docs/variables/ // // Key envs: -// CIRCLECI=true -// CIRCLE_SHA1 head -// CIRCLE_PR_NUMBER PR number (only when triggered from a -// forked-PR webhook) -// CIRCLE_PULL_REQUEST "https://github.com/org/repo/pull/N" -// — parse to recover the PR number -// for non-forked PRs too -// CIRCLE_PROJECT_USERNAME/REPONAME -// CIRCLE_WORKFLOW_ID run id +// +// CIRCLECI=true +// CIRCLE_SHA1 head +// CIRCLE_PR_NUMBER PR number (only when triggered from a +// forked-PR webhook) +// CIRCLE_PULL_REQUEST "https://github.com/org/repo/pull/N" +// — parse to recover the PR number +// for non-forked PRs too +// CIRCLE_PROJECT_USERNAME/REPONAME +// CIRCLE_WORKFLOW_ID run id type CircleCI struct{} func NewCircleCI() Provider { return &CircleCI{} } diff --git a/internal/ci/github_actions.go b/internal/ci/github_actions.go index 11bb176..a46a59f 100644 --- a/internal/ci/github_actions.go +++ b/internal/ci/github_actions.go @@ -13,11 +13,12 @@ import ( // GitHubActions — https://docs.github.com/en/actions/learn-github-actions/variables // // Key envs we rely on: -// GITHUB_ACTIONS=true signature -// GITHUB_SHA head commit -// GITHUB_EVENT_NAME=pull_request + GITHUB_BASE_REF (target branch name) -// GITHUB_REPOSITORY owner/name -// GITHUB_RUN_ID + GITHUB_RUN_ATTEMPT +// +// GITHUB_ACTIONS=true signature +// GITHUB_SHA head commit +// GITHUB_EVENT_NAME=pull_request + GITHUB_BASE_REF (target branch name) +// GITHUB_REPOSITORY owner/name +// GITHUB_RUN_ID + GITHUB_RUN_ATTEMPT // // PR base is tricky: GITHUB_BASE_REF is just the target branch NAME, not // a commit. BaseRef() resolves it by running `git rev-parse diff --git a/internal/ci/gitlab_ci.go b/internal/ci/gitlab_ci.go index 954f8f2..9e6e953 100644 --- a/internal/ci/gitlab_ci.go +++ b/internal/ci/gitlab_ci.go @@ -11,13 +11,14 @@ import ( // GitLabCI — https://docs.gitlab.com/ci/variables/predefined_variables.html // // Key envs: -// GITLAB_CI=true -// CI_COMMIT_SHA head -// CI_MERGE_REQUEST_TARGET_BRANCH_SHA base (MR builds) -// CI_COMMIT_BEFORE_SHA base (push builds) -// CI_MERGE_REQUEST_IID PR number -// CI_PROJECT_PATH owner/name -// CI_PIPELINE_ID run id +// +// GITLAB_CI=true +// CI_COMMIT_SHA head +// CI_MERGE_REQUEST_TARGET_BRANCH_SHA base (MR builds) +// CI_COMMIT_BEFORE_SHA base (push builds) +// CI_MERGE_REQUEST_IID PR number +// CI_PROJECT_PATH owner/name +// CI_PIPELINE_ID run id type GitLabCI struct{} func NewGitLabCI() Provider { return &GitLabCI{} } diff --git a/internal/ci/jenkins.go b/internal/ci/jenkins.go index 71c8924..06767b7 100644 --- a/internal/ci/jenkins.go +++ b/internal/ci/jenkins.go @@ -15,11 +15,12 @@ import ( // the operator prefers to set by hand. // // Key envs: -// JENKINS_URL signature -// GIT_COMMIT head -// CHANGE_ID PR number (Multibranch) -// CHANGE_TARGET PR target branch -// BUILD_NUMBER run id +// +// JENKINS_URL signature +// GIT_COMMIT head +// CHANGE_ID PR number (Multibranch) +// CHANGE_TARGET PR target branch +// BUILD_NUMBER run id type Jenkins struct{} func NewJenkins() Provider { return &Jenkins{} } diff --git a/internal/ci/local.go b/internal/ci/local.go index 1a0de4a..5f54a91 100644 --- a/internal/ci/local.go +++ b/internal/ci/local.go @@ -12,8 +12,9 @@ import ( // the registry list. // // Defaults: -// BaseRef = origin/HEAD (the remote's default-branch tip) -// HeadRef = HEAD +// +// BaseRef = origin/HEAD (the remote's default-branch tip) +// HeadRef = HEAD // // Operators can override either via NULLIFY_BASE_REF / NULLIFY_HEAD_REF // env vars if their local-dev workflow needs something specific. diff --git a/internal/commands/deps_analyze.go b/internal/commands/deps_analyze.go index c892022..eb7fdac 100644 --- a/internal/commands/deps_analyze.go +++ b/internal/commands/deps_analyze.go @@ -33,12 +33,12 @@ import ( // ExitCodes — stable exit-code table so CI operators can gate merges. const ( - exitNoFinding = 0 - exitVulnerableFound = 10 - exitSuspiciousFound = 20 - exitMaliciousFound = 30 - exitInvalidInvocation = 2 - exitTransientFailure = 1 + exitNoFinding = 0 + exitVulnerableFound = 10 + exitSuspiciousFound = 20 + exitMaliciousFound = 30 + exitInvalidInvocation = 2 + exitTransientFailure = 1 ) // RegisterDepsAnalyzeCommand attaches `deps analyze` to the given @@ -63,12 +63,12 @@ func RegisterDepsAnalyzeCommand(parent *cobra.Command, getClient func() *api.Cli } var ( - baseRef string - headRef string - repoPath string - wait bool - failOn string - outputFormat string + baseRef string + headRef string + repoPath string + wait bool + failOn string + outputFormat string idempotencySeed string ) diff --git a/internal/scan/manifest/npm_lock.go b/internal/scan/manifest/npm_lock.go index 4f99631..c6b1ff8 100644 --- a/internal/scan/manifest/npm_lock.go +++ b/internal/scan/manifest/npm_lock.go @@ -33,7 +33,7 @@ func (n *NPMLock) Matches(path string) bool { } type npmLockFile struct { - LockfileVersion int `json:"lockfileVersion"` + LockfileVersion int `json:"lockfileVersion"` Packages map[string]npmLockPackageEntry `json:"packages"` } @@ -73,10 +73,11 @@ func (n *NPMLock) Parse(data []byte, path string) ([]Entry, error) { // stripNodeModulesPrefix turns the lockfile key shape into the npm // package name. Handles nested node_modules (transitive conflicts) by // taking only the last node_modules/ segment: -// "node_modules/lodash" → "lodash" -// "node_modules/a/node_modules/lodash" → "lodash" -// "node_modules/@scope/pkg" → "@scope/pkg" -// "node_modules/a/node_modules/@scope/pkg" → "@scope/pkg" +// +// "node_modules/lodash" → "lodash" +// "node_modules/a/node_modules/lodash" → "lodash" +// "node_modules/@scope/pkg" → "@scope/pkg" +// "node_modules/a/node_modules/@scope/pkg" → "@scope/pkg" func stripNodeModulesPrefix(key string) string { const prefix = "node_modules/" idx := strings.LastIndex(key, prefix) From 74c27aa79333bcbffff5320061afb0c52a4429fb Mon Sep 17 00:00:00 2001 From: Tim Thacker Date: Mon, 25 May 2026 23:25:30 +1000 Subject: [PATCH 4/4] fix(cli): make deps-analyze gate functional + add tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review of the deps-analyze workflow. Blockers: - Wire ExitCodeFromError in main.go so the severity exit codes (10/20/30) actually fire — previously every result exited 1, so the CI merge-gate was inert. Set SilenceUsage so a verdict failure doesn't print cobra usage. - Fix --fail-on=none, which always exited 10: checkFailOn now treats a zero-rank threshold as "never fail". Correctness: - renderResults no longer index-zips two parallel slices (a mid-list analyze failure misattributed verdicts to the wrong package and dropped the tail). One analyzedDep{dep,resp,err} entry per dep now. - Unknown/unrecognised non-empty verdicts fail closed: warn and treat as malicious so a server-side rename can't silently bypass the gate. - Provider BaseRef/HeadRef + resolveRef take repoPath and set cmd.Dir, so --repo is honoured (ref resolution no longer runs in the CWD). Feature: - Implement --wait (was parsed but unused): poll by re-POSTing the idempotent analyze request until the job is terminal or --wait-timeout elapses; adds --wait-timeout/--wait-interval. An incomplete analysis now exits transiently (1) instead of greenlighting the merge. Cleanup: - Typed enums replace magic strings: ci.Platform, scan.ChangeKind, manifest.Ecosystem, commands.Verdict. - Remove the `var _ = fmt.Sprintf` hack and the //nolint:errcheck. - Correct the doc comments that claimed a benchmarks.PipelinePlatform enum and a `make lint-ci-providers` target that don't exist. - Fix the misleading idempotency-key comment; handle SSH repo URLs in AWSCodeBuild.RepoSlug; note fetch-depth:0 for GitHub push diffs. Tests: - Add unit tests for the five lockfile parsers, diffEntries, CI provider detection/parsing, and the verdict/gate/render/wait/exit-code logic. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/cli/main.go | 6 +- internal/ci/aws_codebuild.go | 22 +- internal/ci/azure_devops.go | 14 +- internal/ci/bitbucket_pipelines.go | 14 +- internal/ci/ci.go | 52 ++-- internal/ci/ci_test.go | 131 ++++++++++ internal/ci/circleci.go | 19 +- internal/ci/github_actions.go | 29 ++- internal/ci/gitlab_ci.go | 12 +- internal/ci/google_cloud_build.go | 14 +- internal/ci/jenkins.go | 16 +- internal/ci/local.go | 16 +- internal/ci/registry.go | 5 +- internal/commands/deps_analyze.go | 324 ++++++++++++++++++------ internal/commands/deps_analyze_test.go | 219 ++++++++++++++++ internal/scan/diff.go | 28 +- internal/scan/diff_test.go | 53 ++++ internal/scan/manifest/cargo_lock.go | 2 +- internal/scan/manifest/gemfile_lock.go | 2 +- internal/scan/manifest/go_mod.go | 2 +- internal/scan/manifest/manifest.go | 22 +- internal/scan/manifest/manifest_test.go | 200 +++++++++++++++ internal/scan/manifest/npm_lock.go | 2 +- internal/scan/manifest/pypi_lock.go | 2 +- 24 files changed, 1007 insertions(+), 199 deletions(-) create mode 100644 internal/ci/ci_test.go create mode 100644 internal/commands/deps_analyze_test.go create mode 100644 internal/scan/diff_test.go create mode 100644 internal/scan/manifest/manifest_test.go diff --git a/cmd/cli/main.go b/cmd/cli/main.go index b9876ca..714ae03 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -4,10 +4,14 @@ import ( "os" "github.com/nullify-platform/cli/cmd/cli/cmd" + "github.com/nullify-platform/cli/internal/commands" ) func main() { + // commands.ExitCodeFromError maps the deps-analyze workflow's + // exit-coded errors (severity gate: 10/20/30, invalid: 2, transient: + // 1) onto the process exit code; any other error falls back to 1. if err := cmd.Execute(); err != nil { - os.Exit(1) + os.Exit(commands.ExitCodeFromError(err)) } } diff --git a/internal/ci/aws_codebuild.go b/internal/ci/aws_codebuild.go index a10ca53..791d30b 100644 --- a/internal/ci/aws_codebuild.go +++ b/internal/ci/aws_codebuild.go @@ -19,24 +19,24 @@ type AWSCodeBuild struct{} func NewAWSCodeBuild() Provider { return &AWSCodeBuild{} } -func (a *AWSCodeBuild) Platform() string { return "AWS_CODEBUILD" } +func (a *AWSCodeBuild) Platform() Platform { return PlatformAWSCodeBuild } func (a *AWSCodeBuild) Detect() bool { return os.Getenv("CODEBUILD_BUILD_ID") != "" } -func (a *AWSCodeBuild) BaseRef(ctx context.Context) (string, error) { +func (a *AWSCodeBuild) BaseRef(ctx context.Context, repoPath string) (string, error) { // CODEBUILD_WEBHOOK_BASE_REF is "refs/heads/main" shape — strip the // prefix + resolve. if v := os.Getenv("CODEBUILD_WEBHOOK_BASE_REF"); v != "" { - return resolveRef(ctx, "origin/"+strings.TrimPrefix(v, "refs/heads/")) + return resolveRef(ctx, repoPath, "origin/"+strings.TrimPrefix(v, "refs/heads/")) } - return resolveRef(ctx, "origin/HEAD") + return resolveRef(ctx, repoPath, "origin/HEAD") } -func (a *AWSCodeBuild) HeadRef(ctx context.Context) (string, error) { +func (a *AWSCodeBuild) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("CODEBUILD_RESOLVED_SOURCE_VERSION"); v != "" { return v, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (a *AWSCodeBuild) PRNumber() (int, bool) { @@ -60,9 +60,13 @@ func (a *AWSCodeBuild) RepoSlug() (string, string, bool) { if repoURL == "" { return "", "", false } - // Strip trailing .git + any protocol; keep the final "owner/name" - // segment. + // Strip trailing .git. Handle both HTTPS + // ("https://github.com/owner/repo.git") and SSH + // ("git@github.com:owner/repo.git") forms: in the SSH shape the + // owner is separated from the host by ':', so normalise that to '/' + // before taking the final "owner/name" segments. trimmed := strings.TrimSuffix(repoURL, ".git") + trimmed = strings.ReplaceAll(trimmed, ":", "/") parts := strings.Split(trimmed, "/") if len(parts) < 2 { return "", "", false @@ -77,5 +81,5 @@ func (a *AWSCodeBuild) EnrichHeader(h http.Header) { if v := os.Getenv("CODEBUILD_RESOLVED_SOURCE_VERSION"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", a.Platform()) + h.Set("X-Nullify-CI-Provider", a.Platform().String()) } diff --git a/internal/ci/azure_devops.go b/internal/ci/azure_devops.go index 7d4d5b7..e7de273 100644 --- a/internal/ci/azure_devops.go +++ b/internal/ci/azure_devops.go @@ -21,22 +21,22 @@ type AzureDevOps struct{} func NewAzureDevOps() Provider { return &AzureDevOps{} } -func (a *AzureDevOps) Platform() string { return "AZURE_DEVOPS" } +func (a *AzureDevOps) Platform() Platform { return PlatformAzureDevOps } func (a *AzureDevOps) Detect() bool { return os.Getenv("TF_BUILD") == "True" } -func (a *AzureDevOps) BaseRef(ctx context.Context) (string, error) { +func (a *AzureDevOps) BaseRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("SYSTEM_PULLREQUEST_TARGETBRANCHNAME"); v != "" { - return resolveRef(ctx, "origin/"+v) + return resolveRef(ctx, repoPath, "origin/"+v) } - return resolveRef(ctx, "HEAD^") + return resolveRef(ctx, repoPath, "HEAD^") } -func (a *AzureDevOps) HeadRef(ctx context.Context) (string, error) { +func (a *AzureDevOps) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("BUILD_SOURCEVERSION"); v != "" { return v, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (a *AzureDevOps) PRNumber() (int, bool) { @@ -69,5 +69,5 @@ func (a *AzureDevOps) EnrichHeader(h http.Header) { if v := os.Getenv("BUILD_SOURCEVERSION"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", a.Platform()) + h.Set("X-Nullify-CI-Provider", a.Platform().String()) } diff --git a/internal/ci/bitbucket_pipelines.go b/internal/ci/bitbucket_pipelines.go index 729ee5a..980372a 100644 --- a/internal/ci/bitbucket_pipelines.go +++ b/internal/ci/bitbucket_pipelines.go @@ -20,24 +20,24 @@ type BitbucketPipelines struct{} func NewBitbucketPipelines() Provider { return &BitbucketPipelines{} } -func (b *BitbucketPipelines) Platform() string { return "BITBUCKET_PIPELINES" } +func (b *BitbucketPipelines) Platform() Platform { return PlatformBitbucketPipelines } func (b *BitbucketPipelines) Detect() bool { return os.Getenv("BITBUCKET_BUILD_NUMBER") != "" } -func (b *BitbucketPipelines) BaseRef(ctx context.Context) (string, error) { +func (b *BitbucketPipelines) BaseRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("BITBUCKET_PR_DESTINATION_BRANCH"); v != "" { - return resolveRef(ctx, "origin/"+v) + return resolveRef(ctx, repoPath, "origin/"+v) } - return resolveRef(ctx, "HEAD^") + return resolveRef(ctx, repoPath, "HEAD^") } -func (b *BitbucketPipelines) HeadRef(ctx context.Context) (string, error) { +func (b *BitbucketPipelines) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("BITBUCKET_COMMIT"); v != "" { return v, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (b *BitbucketPipelines) PRNumber() (int, bool) { @@ -68,5 +68,5 @@ func (b *BitbucketPipelines) EnrichHeader(h http.Header) { if v := os.Getenv("BITBUCKET_COMMIT"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", b.Platform()) + h.Set("X-Nullify-CI-Provider", b.Platform().String()) } diff --git a/internal/ci/ci.go b/internal/ci/ci.go index 323912e..1c7e30d 100644 --- a/internal/ci/ci.go +++ b/internal/ci/ci.go @@ -7,10 +7,9 @@ // the Local fallback is last so an unrecognised CI still works off // `git rev-parse` defaults. // -// This package deliberately uses the PipelinePlatform enum from -// benchmarks/models — the canonical set of platforms Nullify supports -// lives there, and any new provider added to this package must have a -// matching enum value (lint check in cli/Makefile enforces this). +// Each provider reports a Platform value from the locally-defined set of +// constants below. When adding a provider, define its constant here and +// return it from Platform(). package ci import ( @@ -19,15 +18,32 @@ import ( "net/http" ) +// Platform is the canonical identifier for a CI/CD platform. Values are +// stamped onto the X-Nullify-CI-Provider header so scpm's audit log can +// attribute a CLI run to its CI environment. +type Platform string + +const ( + PlatformGitHubActions Platform = "GITHUB_ACTIONS" + PlatformGitLabCI Platform = "GITLAB_CI" + PlatformCircleCI Platform = "CIRCLECI" + PlatformBitbucketPipelines Platform = "BITBUCKET_PIPELINES" + PlatformJenkins Platform = "JENKINS" + PlatformAzureDevOps Platform = "AZURE_DEVOPS" + PlatformGoogleCloudBuild Platform = "GOOGLE_CLOUD_BUILD" + PlatformAWSCodeBuild Platform = "AWS_CODEBUILD" + PlatformOther Platform = "OTHER" +) + +func (p Platform) String() string { return string(p) } + // Provider identifies one CI/CD platform and exposes the information // the CLI's deps-analyze + containers-analyze workflows need. All // methods are expected to be cheap — provider detection happens on // every CLI invocation. type Provider interface { - // Platform returns the canonical PipelinePlatform enum value (from - // benchmarks/models). Must match the string in the Makefile's - // provider-coverage lint target. - Platform() string + // Platform returns this provider's Platform constant. + Platform() Platform // Detect returns true when the current process env matches this // provider's signature. Detect MUST NOT touch the network or @@ -35,17 +51,17 @@ type Provider interface { Detect() bool // BaseRef returns the commit or ref (short sha, full sha, or - // branch name) the CI declared as the base of the current build. - // For PR builds, this is the PR target branch's HEAD at PR open - // time; for push builds, it's the previous HEAD of the pushed - // branch. Fall back to "origin/" when CI doesn't - // expose a specific base. - BaseRef(ctx context.Context) (string, error) + // branch name) the CI declared as the base of the current build, + // resolved against the git repository at repoPath. For PR builds, + // this is the PR target branch's HEAD at PR open time; for push + // builds, it's the previous HEAD of the pushed branch. Fall back to + // "origin/" when CI doesn't expose a specific base. + BaseRef(ctx context.Context, repoPath string) (string, error) - // HeadRef returns the commit the current build is running against. - // For PR builds, this is the PR's head commit; for push builds, - // the pushed commit. - HeadRef(ctx context.Context) (string, error) + // HeadRef returns the commit the current build is running against, + // resolved against the git repository at repoPath. For PR builds, + // this is the PR's head commit; for push builds, the pushed commit. + HeadRef(ctx context.Context, repoPath string) (string, error) // PRNumber returns the pull-request number if the current build is // a PR, and (0, false) otherwise. Used for diagnostic logging + as diff --git a/internal/ci/ci_test.go b/internal/ci/ci_test.go new file mode 100644 index 0000000..519962e --- /dev/null +++ b/internal/ci/ci_test.go @@ -0,0 +1,131 @@ +package ci + +import "testing" + +// signatureEnvs are every env var any provider's Detect() keys on. Tests +// clear them all, then set only the ones under test, so the host's own +// environment can't leak into detection. +var signatureEnvs = []string{ + "GITHUB_ACTIONS", "GITLAB_CI", "CIRCLECI", "BITBUCKET_BUILD_NUMBER", + "JENKINS_URL", "TF_BUILD", "BUILD_ID", "PROJECT_ID", "CODEBUILD_BUILD_ID", +} + +func clearSignatureEnvs(t *testing.T) { + t.Helper() + for _, k := range signatureEnvs { + t.Setenv(k, "") + } +} + +func TestDetectPriority(t *testing.T) { + cases := []struct { + name string + env map[string]string + want Platform + }{ + {"github", map[string]string{"GITHUB_ACTIONS": "true"}, PlatformGitHubActions}, + {"gitlab", map[string]string{"GITLAB_CI": "true"}, PlatformGitLabCI}, + {"circleci", map[string]string{"CIRCLECI": "true"}, PlatformCircleCI}, + {"bitbucket", map[string]string{"BITBUCKET_BUILD_NUMBER": "42"}, PlatformBitbucketPipelines}, + {"jenkins", map[string]string{"JENKINS_URL": "https://ci"}, PlatformJenkins}, + {"azure", map[string]string{"TF_BUILD": "True"}, PlatformAzureDevOps}, + {"gcb", map[string]string{"BUILD_ID": "1", "PROJECT_ID": "p"}, PlatformGoogleCloudBuild}, + {"aws", map[string]string{"CODEBUILD_BUILD_ID": "x"}, PlatformAWSCodeBuild}, + {"fallback", map[string]string{}, PlatformOther}, + // GitHub wins when several signatures match (priority order). + {"github_over_gitlab", map[string]string{"GITHUB_ACTIONS": "true", "GITLAB_CI": "true"}, PlatformGitHubActions}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + clearSignatureEnvs(t) + for k, v := range c.env { + t.Setenv(k, v) + } + p, err := Detect(Default()) + if err != nil { + t.Fatalf("Detect: %v", err) + } + if p.Platform() != c.want { + t.Errorf("Platform() = %q, want %q", p.Platform(), c.want) + } + }) + } +} + +func TestDetect_NoLocalFallback(t *testing.T) { + clearSignatureEnvs(t) + if _, err := Detect([]Provider{NewGitHubActions()}); err != ErrNoProvider { + t.Fatalf("err = %v, want ErrNoProvider", err) + } +} + +func TestLocalAlwaysLast(t *testing.T) { + list := Default() + if _, ok := list[len(list)-1].(*Local); !ok { + t.Fatalf("last provider = %T, want *Local", list[len(list)-1]) + } +} + +func TestRepoSlug(t *testing.T) { + cases := []struct { + name string + provider Provider + env map[string]string + owner, repo string + ok bool + }{ + {"github", NewGitHubActions(), map[string]string{"GITHUB_REPOSITORY": "octocat/hello"}, "octocat", "hello", true}, + {"gitlab_nested", NewGitLabCI(), map[string]string{"CI_PROJECT_PATH": "group/sub/proj"}, "group/sub", "proj", true}, + {"circleci", NewCircleCI(), map[string]string{"CIRCLE_PROJECT_USERNAME": "acme", "CIRCLE_PROJECT_REPONAME": "widget"}, "acme", "widget", true}, + {"aws_https", NewAWSCodeBuild(), map[string]string{"CODEBUILD_SOURCE_REPO_URL": "https://github.com/acme/widget.git"}, "acme", "widget", true}, + {"aws_ssh", NewAWSCodeBuild(), map[string]string{"CODEBUILD_SOURCE_REPO_URL": "git@github.com:acme/widget.git"}, "acme", "widget", true}, + {"github_missing", NewGitHubActions(), map[string]string{"GITHUB_REPOSITORY": ""}, "", "", false}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + for k, v := range c.env { + t.Setenv(k, v) + } + owner, repo, ok := c.provider.RepoSlug() + if ok != c.ok || owner != c.owner || repo != c.repo { + t.Errorf("RepoSlug() = (%q, %q, %v), want (%q, %q, %v)", owner, repo, ok, c.owner, c.repo, c.ok) + } + }) + } +} + +func TestPRNumber(t *testing.T) { + cases := []struct { + name string + provider Provider + env map[string]string + want int + ok bool + }{ + {"github_pr", NewGitHubActions(), map[string]string{"GITHUB_REF": "refs/pull/123/merge"}, 123, true}, + {"github_push", NewGitHubActions(), map[string]string{"GITHUB_REF": "refs/heads/main"}, 0, false}, + {"circle_url", NewCircleCI(), map[string]string{"CIRCLE_PULL_REQUEST": "https://github.com/org/repo/pull/77"}, 77, true}, + {"gitlab_iid", NewGitLabCI(), map[string]string{"CI_MERGE_REQUEST_IID": "9"}, 9, true}, + {"aws_trigger", NewAWSCodeBuild(), map[string]string{"CODEBUILD_WEBHOOK_TRIGGER": "pr/55"}, 55, true}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + for k, v := range c.env { + t.Setenv(k, v) + } + n, ok := c.provider.PRNumber() + if n != c.want || ok != c.ok { + t.Errorf("PRNumber() = (%d, %v), want (%d, %v)", n, ok, c.want, c.ok) + } + }) + } +} + +func TestEnrichHeaderStampsProvider(t *testing.T) { + clearSignatureEnvs(t) + h := make(map[string][]string) + NewLocal().EnrichHeader(h) + if got := h["X-Nullify-Ci-Provider"]; len(got) != 1 || got[0] != string(PlatformOther) { + t.Fatalf("X-Nullify-CI-Provider = %v, want [%q]", got, PlatformOther) + } +} diff --git a/internal/ci/circleci.go b/internal/ci/circleci.go index d61e96e..2ed2391 100644 --- a/internal/ci/circleci.go +++ b/internal/ci/circleci.go @@ -2,7 +2,6 @@ package ci import ( "context" - "fmt" "net/http" "os" "strconv" @@ -26,25 +25,25 @@ type CircleCI struct{} func NewCircleCI() Provider { return &CircleCI{} } -func (c *CircleCI) Platform() string { return "CIRCLECI" } +func (c *CircleCI) Platform() Platform { return PlatformCircleCI } func (c *CircleCI) Detect() bool { return os.Getenv("CIRCLECI") == "true" } -func (c *CircleCI) BaseRef(ctx context.Context) (string, error) { +func (c *CircleCI) BaseRef(ctx context.Context, repoPath string) (string, error) { // Circle doesn't expose a base commit directly. Best available: // origin/. Callers wanting PR-diff semantics should // set NULLIFY_BASE_REF explicitly. if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { - return resolveRef(ctx, v) + return resolveRef(ctx, repoPath, v) } - return resolveRef(ctx, "origin/HEAD") + return resolveRef(ctx, repoPath, "origin/HEAD") } -func (c *CircleCI) HeadRef(ctx context.Context) (string, error) { +func (c *CircleCI) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("CIRCLE_SHA1"); v != "" { return v, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (c *CircleCI) PRNumber() (int, bool) { @@ -81,9 +80,5 @@ func (c *CircleCI) EnrichHeader(h http.Header) { if v := os.Getenv("CIRCLE_SHA1"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", c.Platform()) + h.Set("X-Nullify-CI-Provider", c.Platform().String()) } - -// _ is a tiny type guard that keeps fmt imported for future error-wrapping -// additions without a noisy linter warning in the meantime. -var _ = fmt.Sprintf diff --git a/internal/ci/github_actions.go b/internal/ci/github_actions.go index a46a59f..0341057 100644 --- a/internal/ci/github_actions.go +++ b/internal/ci/github_actions.go @@ -28,29 +28,30 @@ type GitHubActions struct{} func NewGitHubActions() Provider { return &GitHubActions{} } -func (g *GitHubActions) Platform() string { return "GITHUB_ACTIONS" } +func (g *GitHubActions) Platform() Platform { return PlatformGitHubActions } func (g *GitHubActions) Detect() bool { return os.Getenv("GITHUB_ACTIONS") == "true" } -func (g *GitHubActions) BaseRef(ctx context.Context) (string, error) { +func (g *GitHubActions) BaseRef(ctx context.Context, repoPath string) (string, error) { // PR build: GITHUB_BASE_REF is populated; resolve to commit. if base := os.Getenv("GITHUB_BASE_REF"); base != "" { - return resolveRef(ctx, "origin/"+base) + return resolveRef(ctx, repoPath, "origin/"+base) } // Push build: use the previous commit on the pushed ref (HEAD^). - // Falls back to the default branch's tip when HEAD has no parent - // (first push). - if sha, err := resolveRef(ctx, "HEAD^"); err == nil { + // Requires a non-shallow checkout — the default actions/checkout is + // depth-1, so HEAD^ is absent and we fall back to the default + // branch's tip. Set `fetch-depth: 0` for true previous-HEAD diffs. + if sha, err := resolveRef(ctx, repoPath, "HEAD^"); err == nil { return sha, nil } - return resolveRef(ctx, "origin/HEAD") + return resolveRef(ctx, repoPath, "origin/HEAD") } -func (g *GitHubActions) HeadRef(ctx context.Context) (string, error) { +func (g *GitHubActions) HeadRef(ctx context.Context, repoPath string) (string, error) { if sha := os.Getenv("GITHUB_SHA"); sha != "" { return sha, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (g *GitHubActions) PRNumber() (int, bool) { @@ -89,13 +90,15 @@ func (g *GitHubActions) EnrichHeader(h http.Header) { if v := os.Getenv("GITHUB_SHA"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", g.Platform()) + h.Set("X-Nullify-CI-Provider", g.Platform().String()) } -// resolveRef runs `git rev-parse` to turn a symbolic ref into a commit. -// Package-level helper so every provider reuses the same shell-out. -func resolveRef(ctx context.Context, ref string) (string, error) { +// resolveRef runs `git rev-parse` in repoPath to turn a symbolic ref into +// a commit. Package-level helper so every provider reuses the same +// shell-out, scoped to the repository the workflow is analysing. +func resolveRef(ctx context.Context, repoPath, ref string) (string, error) { cmd := exec.CommandContext(ctx, "git", "rev-parse", "--verify", ref+"^{commit}") + cmd.Dir = repoPath out, err := cmd.Output() if err != nil { return "", fmt.Errorf("git rev-parse %s: %w", ref, err) diff --git a/internal/ci/gitlab_ci.go b/internal/ci/gitlab_ci.go index 9e6e953..06039a5 100644 --- a/internal/ci/gitlab_ci.go +++ b/internal/ci/gitlab_ci.go @@ -23,11 +23,11 @@ type GitLabCI struct{} func NewGitLabCI() Provider { return &GitLabCI{} } -func (g *GitLabCI) Platform() string { return "GITLAB_CI" } +func (g *GitLabCI) Platform() Platform { return PlatformGitLabCI } func (g *GitLabCI) Detect() bool { return os.Getenv("GITLAB_CI") == "true" } -func (g *GitLabCI) BaseRef(ctx context.Context) (string, error) { +func (g *GitLabCI) BaseRef(ctx context.Context, repoPath string) (string, error) { // MR build preferred (exact PR target sha) if v := os.Getenv("CI_MERGE_REQUEST_TARGET_BRANCH_SHA"); v != "" { return v, nil @@ -37,14 +37,14 @@ func (g *GitLabCI) BaseRef(ctx context.Context) (string, error) { if v := os.Getenv("CI_COMMIT_BEFORE_SHA"); v != "" && !strings.HasPrefix(v, "00000000") { return v, nil } - return resolveRef(ctx, "origin/HEAD") + return resolveRef(ctx, repoPath, "origin/HEAD") } -func (g *GitLabCI) HeadRef(ctx context.Context) (string, error) { +func (g *GitLabCI) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("CI_COMMIT_SHA"); v != "" { return v, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (g *GitLabCI) PRNumber() (int, bool) { @@ -81,5 +81,5 @@ func (g *GitLabCI) EnrichHeader(h http.Header) { if v := os.Getenv("CI_COMMIT_SHA"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", g.Platform()) + h.Set("X-Nullify-CI-Provider", g.Platform().String()) } diff --git a/internal/ci/google_cloud_build.go b/internal/ci/google_cloud_build.go index 31729f0..f12073a 100644 --- a/internal/ci/google_cloud_build.go +++ b/internal/ci/google_cloud_build.go @@ -16,7 +16,7 @@ type GoogleCloudBuild struct{} func NewGoogleCloudBuild() Provider { return &GoogleCloudBuild{} } -func (g *GoogleCloudBuild) Platform() string { return "GOOGLE_CLOUD_BUILD" } +func (g *GoogleCloudBuild) Platform() Platform { return PlatformGoogleCloudBuild } // Detect: Google Cloud Build doesn't expose a single CI=true-style flag; // BUILD_ID + PROJECT_ID together are a reasonable signature. @@ -25,18 +25,18 @@ func (g *GoogleCloudBuild) Detect() bool { os.Getenv("GITLAB_CI") == "" && os.Getenv("GITHUB_ACTIONS") != "true" } -func (g *GoogleCloudBuild) BaseRef(ctx context.Context) (string, error) { +func (g *GoogleCloudBuild) BaseRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { - return resolveRef(ctx, v) + return resolveRef(ctx, repoPath, v) } - return resolveRef(ctx, "origin/HEAD") + return resolveRef(ctx, repoPath, "origin/HEAD") } -func (g *GoogleCloudBuild) HeadRef(ctx context.Context) (string, error) { +func (g *GoogleCloudBuild) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("COMMIT_SHA"); v != "" { return v, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (g *GoogleCloudBuild) PRNumber() (int, bool) { @@ -61,5 +61,5 @@ func (g *GoogleCloudBuild) EnrichHeader(h http.Header) { if v := os.Getenv("COMMIT_SHA"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", g.Platform()) + h.Set("X-Nullify-CI-Provider", g.Platform().String()) } diff --git a/internal/ci/jenkins.go b/internal/ci/jenkins.go index 06767b7..3bb08e0 100644 --- a/internal/ci/jenkins.go +++ b/internal/ci/jenkins.go @@ -25,25 +25,25 @@ type Jenkins struct{} func NewJenkins() Provider { return &Jenkins{} } -func (j *Jenkins) Platform() string { return "JENKINS" } +func (j *Jenkins) Platform() Platform { return PlatformJenkins } func (j *Jenkins) Detect() bool { return os.Getenv("JENKINS_URL") != "" } -func (j *Jenkins) BaseRef(ctx context.Context) (string, error) { +func (j *Jenkins) BaseRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { - return resolveRef(ctx, v) + return resolveRef(ctx, repoPath, v) } if v := os.Getenv("CHANGE_TARGET"); v != "" { - return resolveRef(ctx, "origin/"+v) + return resolveRef(ctx, repoPath, "origin/"+v) } - return resolveRef(ctx, "origin/HEAD") + return resolveRef(ctx, repoPath, "origin/HEAD") } -func (j *Jenkins) HeadRef(ctx context.Context) (string, error) { +func (j *Jenkins) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("GIT_COMMIT"); v != "" { return v, nil } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (j *Jenkins) PRNumber() (int, bool) { @@ -82,5 +82,5 @@ func (j *Jenkins) EnrichHeader(h http.Header) { if v := os.Getenv("GIT_COMMIT"); v != "" { h.Set("X-Nullify-CI-Commit", v) } - h.Set("X-Nullify-CI-Provider", j.Platform()) + h.Set("X-Nullify-CI-Provider", j.Platform().String()) } diff --git a/internal/ci/local.go b/internal/ci/local.go index 5f54a91..a03b722 100644 --- a/internal/ci/local.go +++ b/internal/ci/local.go @@ -22,22 +22,22 @@ type Local struct{} func NewLocal() Provider { return &Local{} } -func (l *Local) Platform() string { return "OTHER" } +func (l *Local) Platform() Platform { return PlatformOther } func (l *Local) Detect() bool { return true } -func (l *Local) BaseRef(ctx context.Context) (string, error) { +func (l *Local) BaseRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("NULLIFY_BASE_REF"); v != "" { - return resolveRef(ctx, v) + return resolveRef(ctx, repoPath, v) } - return resolveRef(ctx, "origin/HEAD") + return resolveRef(ctx, repoPath, "origin/HEAD") } -func (l *Local) HeadRef(ctx context.Context) (string, error) { +func (l *Local) HeadRef(ctx context.Context, repoPath string) (string, error) { if v := os.Getenv("NULLIFY_HEAD_REF"); v != "" { - return resolveRef(ctx, v) + return resolveRef(ctx, repoPath, v) } - return resolveRef(ctx, "HEAD") + return resolveRef(ctx, repoPath, "HEAD") } func (l *Local) PRNumber() (int, bool) { @@ -58,5 +58,5 @@ func (l *Local) RepoSlug() (string, string, bool) { } func (l *Local) EnrichHeader(h http.Header) { - h.Set("X-Nullify-CI-Provider", l.Platform()) + h.Set("X-Nullify-CI-Provider", l.Platform().String()) } diff --git a/internal/ci/registry.go b/internal/ci/registry.go index ca20361..7a5ec1d 100644 --- a/internal/ci/registry.go +++ b/internal/ci/registry.go @@ -6,9 +6,8 @@ package ci // // When adding a new provider: // 1. Implement Provider in a new file under this package. -// 2. Append it to the list below (before Local). -// 3. Ensure its Platform() value matches a benchmarks.PipelinePlatform -// enum — `make -C cli lint-ci-providers` verifies this. +// 2. Define its Platform constant in ci.go and return it from Platform(). +// 3. Append it to the list below (before Local). // Default returns the full list of providers in detection priority. // Exposed as a constructor (not a package-level var) so tests can build diff --git a/internal/commands/deps_analyze.go b/internal/commands/deps_analyze.go index eb7fdac..7a630af 100644 --- a/internal/commands/deps_analyze.go +++ b/internal/commands/deps_analyze.go @@ -31,9 +31,13 @@ import ( "github.com/spf13/cobra" ) -// ExitCodes — stable exit-code table so CI operators can gate merges. +// Stable exit-code table so CI operators can gate merges. These mirror +// the global codes in cmd/cli/cmd/exitcodes.go where they overlap (1 = +// transient/retry, 2 = invalid invocation) and add the deps-specific +// severity codes 10/20/30. They live here rather than in the cmd package +// because cmd imports commands — referencing the cmd constants would be +// an import cycle. main.go maps these onto os.Exit via ExitCodeFromError. const ( - exitNoFinding = 0 exitVulnerableFound = 10 exitSuspiciousFound = 20 exitMaliciousFound = 30 @@ -41,6 +45,38 @@ const ( exitTransientFailure = 1 ) +// Verdict is a dependency malware-analysis verdict as returned by +// vuln-database (passed through scpm). The recognised set is below; any +// other non-empty verdict is treated as unknown and fails closed (see +// classifyVerdict) so a server-side rename can't silently bypass the gate. +type Verdict string + +const ( + VerdictBenign Verdict = "benign" + VerdictVulnerable Verdict = "vulnerable" + VerdictSuspicious Verdict = "suspicious" + VerdictMalicious Verdict = "confirmed_malicious" +) + +// Default polling cadence/cap for --wait. Analysis is asynchronous, so +// without --wait the first response usually carries an empty verdict. +const ( + defaultWaitTimeout = 5 * time.Minute + defaultWaitInterval = 5 * time.Second +) + +// terminalStatuses are the job statuses we treat as "done" when polling. +// The exact strings originate in vuln-database; this set is intentionally +// broad so an unanticipated terminal label still ends the poll rather than +// spinning to the --wait timeout. A populated verdict or a cache hit is +// also treated as terminal regardless of status. +var terminalStatuses = map[string]bool{ + "completed": true, "complete": true, + "succeeded": true, "success": true, + "failed": true, "failure": true, + "error": true, "cancelled": true, "canceled": true, +} + // RegisterDepsAnalyzeCommand attaches `deps analyze` to the given // parent command. api.Client is reused for auth (BaseURL + Token) — // even though the scpm calls aren't in the generated surface yet, @@ -67,6 +103,8 @@ func RegisterDepsAnalyzeCommand(parent *cobra.Command, getClient func() *api.Cli headRef string repoPath string wait bool + waitTimeout time.Duration + waitInterval time.Duration failOn string outputFormat string idempotencySeed string @@ -82,14 +120,21 @@ between --base (default: CI-provided target branch) and --head (default HEAD), and requests malware analysis for each new or bumped dep via scpm /scpm/dependencies/analyze. +Analysis is asynchronous: by default each dep is enqueued and the command +returns immediately with whatever verdict is already cached. Pass --wait +to block (per --wait-timeout) until every job reaches a terminal verdict. + Exit codes: - 0 no concerning findings + 0 no concerning findings (or --fail-on=none) 10 vulnerable dependency detected 20 suspicious malware signal - 30 confirmed malicious + 30 confirmed (or unrecognised) malicious verdict 2 invalid invocation - 1 transient failure (retry) + 1 transient failure / incomplete analysis (retry) `, + // A verdict-based failure is a legitimate gate result, not a + // usage error — don't dump cobra usage text on it. + SilenceUsage: true, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() return runDepsAnalyze(ctx, getClient(), depsAnalyzeOpts{ @@ -97,6 +142,8 @@ Exit codes: HeadRef: headRef, RepoPath: repoPath, Wait: wait, + WaitTimeout: waitTimeout, + WaitInterval: waitInterval, FailOn: failOn, OutputFormat: outputFormat, IdempotencySeed: idempotencySeed, @@ -108,9 +155,11 @@ Exit codes: analyzeCmd.Flags().StringVar(&headRef, "head", "HEAD", "Head commit SHA or ref") analyzeCmd.Flags().StringVar(&repoPath, "repo", ".", "Path to the git repository") analyzeCmd.Flags().BoolVar(&wait, "wait", false, "Block until every analysis job reaches a terminal verdict") + analyzeCmd.Flags().DurationVar(&waitTimeout, "wait-timeout", defaultWaitTimeout, "With --wait, give up polling a job after this long") + analyzeCmd.Flags().DurationVar(&waitInterval, "wait-interval", defaultWaitInterval, "With --wait, delay between poll attempts") analyzeCmd.Flags().StringVar(&failOn, "fail-on", "malicious", "Exit non-zero when any finding of this severity or worse: vulnerable|suspicious|malicious|none") analyzeCmd.Flags().StringVar(&outputFormat, "format", "text", "Output format: text|json") - analyzeCmd.Flags().StringVar(&idempotencySeed, "idempotency-seed", "", "Prefix for the scpm idempotency key; default is CI-provider+run-id") + analyzeCmd.Flags().StringVar(&idempotencySeed, "idempotency-seed", "", "Prefix for the scpm idempotency key; default is the CI provider name") depsCmd.AddCommand(analyzeCmd) } @@ -119,6 +168,8 @@ type depsAnalyzeOpts struct { HeadRef string RepoPath string Wait bool + WaitTimeout time.Duration + WaitInterval time.Duration FailOn string OutputFormat string IdempotencySeed string @@ -130,18 +181,28 @@ type depsAnalyzeOpts struct { // regeneration cycle completes, these are the only structs the CLI // needs. type scpmAnalyzeRequest struct { - Ecosystem string `json:"ecosystem"` - Name string `json:"name"` - Version string `json:"version"` - PreviousVersion string `json:"previousVersion,omitempty"` - IdempotencyKey string `json:"idempotencyKey,omitempty"` + Ecosystem manifest.Ecosystem `json:"ecosystem"` + Name string `json:"name"` + Version string `json:"version"` + PreviousVersion string `json:"previousVersion,omitempty"` + IdempotencyKey string `json:"idempotencyKey,omitempty"` } type scpmAnalyzeResponse struct { - JobID string `json:"jobId"` - Status string `json:"status"` - CacheHit bool `json:"cacheHit"` - Verdict string `json:"verdict,omitempty"` + JobID string `json:"jobId"` + Status string `json:"status"` + CacheHit bool `json:"cacheHit"` + Verdict Verdict `json:"verdict,omitempty"` +} + +// analyzedDep pairs an actionable dependency with the outcome of its +// analyze call. One entry exists per actionable dep — including failures +// (Resp nil, Err set) — so rendering and gating never rely on two +// index-aligned slices staying in sync. +type analyzedDep struct { + Dep scan.ChangedDep `json:"dep"` + Resp *scpmAnalyzeResponse `json:"resp,omitempty"` + Err string `json:"error,omitempty"` } func runDepsAnalyze(ctx context.Context, client *api.Client, opts depsAnalyzeOpts) error { @@ -158,14 +219,14 @@ func runDepsAnalyze(ctx context.Context, client *api.Client, opts depsAnalyzeOpt base := opts.BaseRef if base == "" { - base, err = provider.BaseRef(ctx) + base, err = provider.BaseRef(ctx, opts.RepoPath) if err != nil { return exitError(exitInvalidInvocation, "resolve base ref: %v", err) } } head := opts.HeadRef if head == "" { - head, err = provider.HeadRef(ctx) + head, err = provider.HeadRef(ctx, opts.RepoPath) if err != nil { return exitError(exitInvalidInvocation, "resolve head ref: %v", err) } @@ -192,17 +253,18 @@ func runDepsAnalyze(ctx context.Context, client *api.Client, opts depsAnalyzeOpt return nil } - // Only "added" and "bumped" deps have a new version worth analysing. + // Only added and bumped deps have a new version worth analysing. actionable := make([]scan.ChangedDep, 0, len(changed)) for _, d := range changed { - if d.Kind == "added" || d.Kind == "bumped" { + if d.Kind == scan.KindAdded || d.Kind == scan.KindBumped { actionable = append(actionable, d) } } fmt.Fprintf(os.Stderr, "nullify deps analyze: %d changed, %d actionable\n", len(changed), len(actionable)) - results := []scpmAnalyzeResponse{} - worstVerdict := "" + analyzed := make([]analyzedDep, 0, len(actionable)) + worst := gateOutcome{} + errCount := 0 for _, d := range actionable { req := scpmAnalyzeRequest{ Ecosystem: d.Ecosystem, @@ -211,31 +273,78 @@ func runDepsAnalyze(ctx context.Context, client *api.Client, opts depsAnalyzeOpt PreviousVersion: d.PreviousVersion, IdempotencyKey: buildIdempotencyKey(opts.IdempotencySeed, provider, d), } - resp, err := postSCPMAnalyze(ctx, client, provider, req) + resp, err := analyzeDep(ctx, client, provider, req, opts) if err != nil { - // Single-dep failure shouldn't sink the whole run — - // surface and continue. The exit code still reflects the - // worst observed verdict across successful calls. + // A single failed/incomplete analysis shouldn't be silently + // greenlit — record it and fail transiently at the end unless + // a worse verdict already gates the run. fmt.Fprintf(os.Stderr, " %s/%s@%s: analyze failed: %v\n", d.Ecosystem, d.Name, d.Version, err) + analyzed = append(analyzed, analyzedDep{Dep: d, Err: err.Error()}) + errCount++ continue } - results = append(results, *resp) - if v := verdictRank(resp.Verdict); v > verdictRank(worstVerdict) { - worstVerdict = resp.Verdict + analyzed = append(analyzed, analyzedDep{Dep: d, Resp: resp}) + if o := classifyVerdict(resp.Verdict, d); o.rank > worst.rank { + worst = o } } - if err := renderResults(opts.OutputFormat, actionable, results); err != nil { + if err := renderResults(opts.OutputFormat, analyzed); err != nil { return exitError(exitTransientFailure, "render: %v", err) } - return checkFailOn(opts.FailOn, worstVerdict) + if err := checkFailOn(opts.FailOn, worst); err != nil { + return err + } + // Gate passed on observed verdicts, but if any analysis didn't + // complete we can't certify the run — exit transiently so CI retries + // rather than merging on incomplete data (unless the operator opted + // out of gating entirely). + if errCount > 0 && opts.FailOn != "none" { + return exitError(exitTransientFailure, "%d of %d dependency analyses did not complete; re-run", errCount, len(actionable)) + } + return nil } -// buildIdempotencyKey combines the caller-supplied seed (or the CI -// provider's run ID) with the (ecosystem, name, version) tuple. This -// makes a second invocation of the same CI run return cached results -// even if the CI is re-triggered (common on flaky CI). +// gateOutcome is the worst result seen across analysed deps: a comparable +// rank, a human label for messaging, and the exit code to use if it ends +// up being the worst. +type gateOutcome struct { + rank int + label string + code int +} + +// classifyVerdict maps a verdict + its dependency to a gateOutcome. +// Recognised verdicts use their normal rank; a non-empty verdict the CLI +// doesn't recognise fails closed — it warns and is ranked as malicious so +// it crosses any --fail-on threshold except "none". An empty verdict (job +// still pending, no --wait) is not a verdict and contributes nothing. +func classifyVerdict(v Verdict, d scan.ChangedDep) gateOutcome { + switch v { + case "": + return gateOutcome{} + case VerdictBenign: + return gateOutcome{rank: 1, label: string(v), code: exitVulnerableFound} + case VerdictVulnerable: + return gateOutcome{rank: 2, label: string(v), code: exitVulnerableFound} + case VerdictSuspicious: + return gateOutcome{rank: 3, label: string(v), code: exitSuspiciousFound} + case VerdictMalicious: + return gateOutcome{rank: 4, label: string(v), code: exitMaliciousFound} + default: + fmt.Fprintf(os.Stderr, + " WARNING: %s/%s@%s returned unrecognised verdict %q — failing closed (treating as malicious)\n", + d.Ecosystem, d.Name, d.Version, v) + return gateOutcome{rank: 4, label: string(v) + " (unrecognised)", code: exitMaliciousFound} + } +} + +// buildIdempotencyKey combines the caller-supplied seed (or, by default, +// the CI provider name) with the (ecosystem, name, version) tuple. The +// default seed is stable across runs of the same provider on purpose: +// re-runs and re-triggers of a flaky pipeline then coalesce onto the same +// scpm job and reuse its cached verdict rather than re-queuing analysis. func buildIdempotencyKey(seed string, p ci.Provider, d scan.ChangedDep) string { if seed == "" { if v := os.Getenv("NULLIFY_IDEMPOTENCY_SEED"); v != "" { @@ -243,12 +352,66 @@ func buildIdempotencyKey(seed string, p ci.Provider, d scan.ChangedDep) string { } } if seed == "" { - // Per-CI default: the run ID header the provider emits. seed = fmt.Sprintf("ci-%s", p.Platform()) } return fmt.Sprintf("%s|%s|%s|%s", seed, d.Ecosystem, d.Name, d.Version) } +// analyzeDep POSTs one analyze request and, when --wait is set, polls the +// same idempotent request until the job reaches a terminal state or the +// wait timeout elapses. Re-POSTing with the same idempotency key coalesces +// onto the existing job (idempotent within 24h) and returns its updated +// status/verdict. +func analyzeDep(ctx context.Context, client *api.Client, provider ci.Provider, req scpmAnalyzeRequest, opts depsAnalyzeOpts) (*scpmAnalyzeResponse, error) { + resp, err := postSCPMAnalyze(ctx, client, provider, req) + if err != nil { + return nil, err + } + if !opts.Wait || isTerminal(resp) { + return resp, nil + } + + interval := opts.WaitInterval + if interval <= 0 { + interval = defaultWaitInterval + } + timeout := opts.WaitTimeout + if timeout <= 0 { + timeout = defaultWaitTimeout + } + deadline := time.Now().Add(timeout) + for { + wait := interval + if remaining := time.Until(deadline); remaining < wait { + wait = remaining + } + if wait <= 0 { + return nil, fmt.Errorf("analysis still %q after %s wait timeout", resp.Status, timeout) + } + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(wait): + } + resp, err = postSCPMAnalyze(ctx, client, provider, req) + if err != nil { + return nil, err + } + if isTerminal(resp) { + return resp, nil + } + } +} + +// isTerminal reports whether an analyze response represents a finished +// job: a cache hit, a populated verdict, or a terminal status string. +func isTerminal(resp *scpmAnalyzeResponse) bool { + if resp.CacheHit || resp.Verdict != "" { + return true + } + return terminalStatuses[strings.ToLower(strings.TrimSpace(resp.Status))] +} + func postSCPMAnalyze(ctx context.Context, client *api.Client, provider ci.Provider, req scpmAnalyzeRequest) (*scpmAnalyzeResponse, error) { body, err := json.Marshal(req) if err != nil { @@ -272,7 +435,7 @@ func postSCPMAnalyze(ctx context.Context, client *api.Client, provider ci.Provid if err != nil { return nil, err } - defer resp.Body.Close() //nolint:errcheck + defer func() { _ = resp.Body.Close() }() if resp.StatusCode < 200 || resp.StatusCode >= 300 { b, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) @@ -285,77 +448,72 @@ func postSCPMAnalyze(ctx context.Context, client *api.Client, provider ci.Provid return &out, nil } -// verdictRank maps verdict strings to a comparable int so we can -// compute "worst seen" across a batch. Unknown verdicts sort below -// benign so they don't accidentally trip --fail-on=vulnerable. -func verdictRank(v string) int { +// checkFailOn maps (--fail-on, worst-outcome) to an exit code. A +// zero-rank threshold means --fail-on=none, which never fails. +func checkFailOn(failOn string, worst gateOutcome) error { + threshold := verdictRank(verdictThreshold(failOn)) + if threshold == 0 || worst.rank < threshold { + return nil + } + return exitError(worst.code, "dependency gate failed: %s", worst.label) +} + +// verdictRank maps a recognised verdict to the same comparable rank used +// by classifyVerdict, for translating the --fail-on threshold. Unknown +// values (including "") rank 0 here; unknown *response* verdicts are +// handled by classifyVerdict (fail-closed) before they reach the gate. +func verdictRank(v Verdict) int { switch v { - case "confirmed_malicious": + case VerdictMalicious: return 4 - case "suspicious": + case VerdictSuspicious: return 3 - case "vulnerable": + case VerdictVulnerable: return 2 - case "benign": + case VerdictBenign: return 1 default: return 0 } } -// checkFailOn maps (--fail-on, worst-verdict) to an exit code. -func checkFailOn(failOn, worst string) error { - threshold := verdictRank(verdictThreshold(failOn)) - actual := verdictRank(worst) - if actual < threshold { - return nil - } - switch worst { - case "confirmed_malicious": - return exitError(exitMaliciousFound, "malicious dependency detected") - case "suspicious": - return exitError(exitSuspiciousFound, "suspicious dependency detected") - default: - return exitError(exitVulnerableFound, "concerning verdict: %q", worst) - } -} - -func verdictThreshold(failOn string) string { +func verdictThreshold(failOn string) Verdict { switch failOn { case "none": - return "" // unreachable — verdictRank("") is 0, never crossed + return "" // rank 0 — checkFailOn never crosses it case "vulnerable": - return "vulnerable" + return VerdictVulnerable case "suspicious": - return "suspicious" - case "malicious", "": - return "confirmed_malicious" + return VerdictSuspicious + default: // "malicious" and the default + return VerdictMalicious } - return "confirmed_malicious" } -func renderResults(format string, changed []scan.ChangedDep, results []scpmAnalyzeResponse) error { +func renderResults(format string, analyzed []analyzedDep) error { switch format { case "json": enc := json.NewEncoder(os.Stdout) enc.SetIndent("", " ") - return enc.Encode(map[string]any{"changed": changed, "results": results}) + return enc.Encode(analyzed) case "text", "": - for i, d := range changed { - if i >= len(results) { - break - } - r := results[i] + for _, a := range analyzed { + d := a.Dep prev := d.PreviousVersion if prev == "" { prev = "(new)" } - verdict := r.Verdict + if a.Resp == nil { + fmt.Fprintf(os.Stdout, " %s/%s %s → %s [error: %s]\n", + d.Ecosystem, d.Name, prev, d.Version, a.Err) + continue + } + verdict := string(a.Resp.Verdict) if verdict == "" { - verdict = r.Status + verdict = a.Resp.Status } fmt.Fprintf(os.Stdout, " %s/%s %s → %s [%s] job=%s\n", - d.Ecosystem, d.Name, prev, d.Version, verdict, r.JobID) + d.Ecosystem, d.Name, prev, d.Version, verdict, a.Resp.JobID) } return nil default: @@ -382,8 +540,8 @@ func short(sha string) string { return sha } -// exitErr wraps an error with an exit code so the top-level cobra -// handler can translate it to os.Exit(N). Declared locally so this +// exitErr wraps an error with an exit code so the top-level handler in +// main.go can translate it to os.Exit(N). Declared locally so this // command doesn't drag in the cli's main-pkg exit-code wiring. type exitErr struct { Code int @@ -396,9 +554,9 @@ func exitError(code int, format string, args ...any) error { return exitErr{Code: code, Msg: fmt.Sprintf(format, args...)} } -// ExitCodeFromError returns the exit code an exitErr wants, or 1 for -// any other error. Callers use this in place of the cli's standard -// error-to-exit logic. +// ExitCodeFromError returns the exit code an exitErr wants, or +// exitTransientFailure (1) for any other non-nil error. main.go calls +// this to translate the workflow's result into a process exit code. func ExitCodeFromError(err error) int { if err == nil { return 0 diff --git a/internal/commands/deps_analyze_test.go b/internal/commands/deps_analyze_test.go new file mode 100644 index 0000000..3d0059e --- /dev/null +++ b/internal/commands/deps_analyze_test.go @@ -0,0 +1,219 @@ +package commands + +import ( + "context" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/nullify-platform/cli/internal/api" + "github.com/nullify-platform/cli/internal/ci" + "github.com/nullify-platform/cli/internal/scan" + "github.com/nullify-platform/cli/internal/scan/manifest" +) + +func dep(name string) scan.ChangedDep { + return scan.ChangedDep{Ecosystem: manifest.EcosystemNPM, Name: name, Version: "1.0.0", Kind: scan.KindAdded} +} + +func TestCheckFailOn(t *testing.T) { + cases := []struct { + name string + failOn string + verdict Verdict + wantErr bool + wantExit int + }{ + {"none_benign", "none", VerdictBenign, false, 0}, + {"none_malicious", "none", VerdictMalicious, false, 0}, // none never fails + {"none_empty", "none", "", false, 0}, + {"default_benign", "malicious", VerdictBenign, false, 0}, + {"default_vulnerable", "malicious", VerdictVulnerable, false, 0}, + {"default_suspicious", "malicious", VerdictSuspicious, false, 0}, + {"default_malicious", "malicious", VerdictMalicious, true, exitMaliciousFound}, + {"vuln_threshold_vuln", "vulnerable", VerdictVulnerable, true, exitVulnerableFound}, + {"vuln_threshold_benign", "vulnerable", VerdictBenign, false, 0}, + {"susp_threshold_susp", "suspicious", VerdictSuspicious, true, exitSuspiciousFound}, + {"susp_threshold_vuln", "suspicious", VerdictVulnerable, false, 0}, + {"empty_verdict_never_fails", "vulnerable", "", false, 0}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + worst := classifyVerdict(c.verdict, dep("x")) + err := checkFailOn(c.failOn, worst) + if (err != nil) != c.wantErr { + t.Fatalf("checkFailOn(%q, %q) err = %v, wantErr %v", c.failOn, c.verdict, err, c.wantErr) + } + if c.wantErr && ExitCodeFromError(err) != c.wantExit { + t.Errorf("exit code = %d, want %d", ExitCodeFromError(err), c.wantExit) + } + }) + } +} + +func TestClassifyVerdict_UnknownFailsClosed(t *testing.T) { + worst := classifyVerdict(Verdict("brand_new_label"), dep("evil")) + if worst.rank != 4 { + t.Fatalf("unknown verdict rank = %d, want 4 (malicious)", worst.rank) + } + // Crosses every threshold except none. + if err := checkFailOn("malicious", worst); err == nil { + t.Error("unknown verdict should fail --fail-on=malicious") + } + if err := checkFailOn("vulnerable", worst); err == nil { + t.Error("unknown verdict should fail --fail-on=vulnerable") + } + if err := checkFailOn("none", worst); err != nil { + t.Errorf("unknown verdict should NOT fail --fail-on=none, got %v", err) + } + if ExitCodeFromError(checkFailOn("malicious", worst)) != exitMaliciousFound { + t.Error("unknown verdict should exit with malicious code") + } +} + +func TestExitCodeFromError(t *testing.T) { + if got := ExitCodeFromError(nil); got != 0 { + t.Errorf("nil → %d, want 0", got) + } + if got := ExitCodeFromError(exitError(exitMaliciousFound, "boom")); got != exitMaliciousFound { + t.Errorf("exitErr → %d, want %d", got, exitMaliciousFound) + } + if got := ExitCodeFromError(context.Canceled); got != exitTransientFailure { + t.Errorf("plain error → %d, want %d", got, exitTransientFailure) + } +} + +func TestBuildIdempotencyKey(t *testing.T) { + t.Setenv("NULLIFY_IDEMPOTENCY_SEED", "") + clearLocalEnv(t) + p := ci.NewLocal() + d := dep("lodash") + + // Default seed is provider-name scoped (stable across runs). + got := buildIdempotencyKey("", p, d) + want := "ci-OTHER|npm|lodash|1.0.0" + if got != want { + t.Errorf("default key = %q, want %q", got, want) + } + // Explicit seed wins. + if got := buildIdempotencyKey("myseed", p, d); got != "myseed|npm|lodash|1.0.0" { + t.Errorf("explicit seed key = %q", got) + } +} + +func clearLocalEnv(t *testing.T) { + t.Helper() + for _, k := range []string{"GITHUB_ACTIONS", "GITLAB_CI", "CIRCLECI", "BITBUCKET_BUILD_NUMBER", "JENKINS_URL", "TF_BUILD", "BUILD_ID", "PROJECT_ID", "CODEBUILD_BUILD_ID"} { + t.Setenv(k, "") + } +} + +func TestRenderResults_AlignmentWithFailure(t *testing.T) { + // A middle failure must not shift verdicts onto the wrong dep — the + // JSON path encodes one entry per dep with its own outcome. + analyzed := []analyzedDep{ + {Dep: dep("a"), Resp: &scpmAnalyzeResponse{JobID: "j1", Verdict: VerdictBenign}}, + {Dep: dep("b"), Err: "scpm 500"}, + {Dep: dep("c"), Resp: &scpmAnalyzeResponse{JobID: "j3", Verdict: VerdictMalicious}}, + } + // text + json render without panicking / mismatching lengths. + if err := renderResults("text", analyzed); err != nil { + t.Fatalf("text render: %v", err) + } + if err := renderResults("json", analyzed); err != nil { + t.Fatalf("json render: %v", err) + } + if err := renderResults("bogus", analyzed); err == nil { + t.Error("expected error for unknown format") + } +} + +func TestIsTerminal(t *testing.T) { + cases := []struct { + resp scpmAnalyzeResponse + want bool + }{ + {scpmAnalyzeResponse{CacheHit: true}, true}, + {scpmAnalyzeResponse{Verdict: VerdictBenign}, true}, + {scpmAnalyzeResponse{Status: "completed"}, true}, + {scpmAnalyzeResponse{Status: "FAILED"}, true}, + {scpmAnalyzeResponse{Status: "queued"}, false}, + {scpmAnalyzeResponse{Status: "running"}, false}, + {scpmAnalyzeResponse{}, false}, + } + for _, c := range cases { + if got := isTerminal(&c.resp); got != c.want { + t.Errorf("isTerminal(%+v) = %v, want %v", c.resp, got, c.want) + } + } +} + +func TestAnalyzeDep_CacheHitNoWait(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`{"jobId":"j1","status":"completed","cacheHit":true,"verdict":"benign"}`)) + })) + defer srv.Close() + + client := &api.Client{BaseURL: srv.URL, HTTPClient: srv.Client()} + resp, err := analyzeDep(context.Background(), client, ci.NewLocal(), scpmAnalyzeRequest{Name: "x"}, depsAnalyzeOpts{}) + if err != nil { + t.Fatalf("analyzeDep: %v", err) + } + if resp.Verdict != VerdictBenign { + t.Errorf("verdict = %q, want benign", resp.Verdict) + } +} + +func TestAnalyzeDep_WaitPollsUntilTerminal(t *testing.T) { + var calls atomic.Int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + n := calls.Add(1) + if n < 3 { + _, _ = w.Write([]byte(`{"jobId":"j1","status":"running"}`)) + return + } + _, _ = w.Write([]byte(`{"jobId":"j1","status":"completed","verdict":"confirmed_malicious"}`)) + })) + defer srv.Close() + + client := &api.Client{BaseURL: srv.URL, HTTPClient: srv.Client()} + opts := depsAnalyzeOpts{Wait: true, WaitInterval: time.Millisecond, WaitTimeout: 5 * time.Second} + resp, err := analyzeDep(context.Background(), client, ci.NewLocal(), scpmAnalyzeRequest{Name: "x"}, opts) + if err != nil { + t.Fatalf("analyzeDep: %v", err) + } + if resp.Verdict != VerdictMalicious { + t.Errorf("verdict = %q, want confirmed_malicious", resp.Verdict) + } + if calls.Load() < 3 { + t.Errorf("expected >=3 polls, got %d", calls.Load()) + } +} + +func TestAnalyzeDep_WaitTimeout(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`{"jobId":"j1","status":"running"}`)) + })) + defer srv.Close() + + client := &api.Client{BaseURL: srv.URL, HTTPClient: srv.Client()} + opts := depsAnalyzeOpts{Wait: true, WaitInterval: time.Millisecond, WaitTimeout: 20 * time.Millisecond} + if _, err := analyzeDep(context.Background(), client, ci.NewLocal(), scpmAnalyzeRequest{Name: "x"}, opts); err == nil { + t.Fatal("expected wait-timeout error") + } +} + +func TestPostSCPMAnalyze_HTTPError(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte(`{"error":"boom"}`)) + })) + defer srv.Close() + + client := &api.Client{BaseURL: srv.URL, HTTPClient: srv.Client()} + if _, err := postSCPMAnalyze(context.Background(), client, ci.NewLocal(), scpmAnalyzeRequest{Name: "x"}); err == nil { + t.Fatal("expected error on 500") + } +} diff --git a/internal/scan/diff.go b/internal/scan/diff.go index 72a4926..c0dd900 100644 --- a/internal/scan/diff.go +++ b/internal/scan/diff.go @@ -14,18 +14,30 @@ import ( "github.com/nullify-platform/cli/internal/scan/manifest" ) +// ChangeKind distinguishes the three ways a dependency can change +// between base and head. +type ChangeKind string + +const ( + KindAdded ChangeKind = "added" + KindBumped ChangeKind = "bumped" + KindRemoved ChangeKind = "removed" +) + +func (k ChangeKind) String() string { return string(k) } + // ChangedDep is one dependency whose (ecosystem, name, version) tuple // changed between base and head. Kind distinguishes the three change // modes so the workflow can prioritise "new version + bumped version" // for analyze calls while skipping removals (removed deps don't have a // new version to analyse). type ChangedDep struct { - Ecosystem string + Ecosystem manifest.Ecosystem Name string Version string - PreviousVersion string // Empty for "added". - Kind string // "added" | "bumped" | "removed" - File string // Lockfile path change was detected in + PreviousVersion string // Empty for KindAdded. + Kind ChangeKind // KindAdded | KindBumped | KindRemoved + File string // Lockfile path change was detected in } // Diff compares two commits by reading their lockfiles via `git show` @@ -118,7 +130,7 @@ func parseAtRef( } type entryKey struct { - Ecosystem string + Ecosystem manifest.Ecosystem Name string } @@ -149,7 +161,7 @@ func diffEntries(base, head map[entryKey]manifest.Entry) []ChangedDep { Ecosystem: headEntry.Ecosystem, Name: headEntry.Name, Version: headEntry.Version, - Kind: "added", + Kind: KindAdded, File: headEntry.File, }) case baseEntry.Version != headEntry.Version: @@ -158,7 +170,7 @@ func diffEntries(base, head map[entryKey]manifest.Entry) []ChangedDep { Name: headEntry.Name, Version: headEntry.Version, PreviousVersion: baseEntry.Version, - Kind: "bumped", + Kind: KindBumped, File: headEntry.File, }) } @@ -169,7 +181,7 @@ func diffEntries(base, head map[entryKey]manifest.Entry) []ChangedDep { Ecosystem: baseEntry.Ecosystem, Name: baseEntry.Name, PreviousVersion: baseEntry.Version, - Kind: "removed", + Kind: KindRemoved, File: baseEntry.File, }) } diff --git a/internal/scan/diff_test.go b/internal/scan/diff_test.go new file mode 100644 index 0000000..6f4e014 --- /dev/null +++ b/internal/scan/diff_test.go @@ -0,0 +1,53 @@ +package scan + +import ( + "sort" + "testing" + + "github.com/nullify-platform/cli/internal/scan/manifest" +) + +func mkMap(entries ...manifest.Entry) map[entryKey]manifest.Entry { + m := map[entryKey]manifest.Entry{} + for _, e := range entries { + m[entryKey{Ecosystem: e.Ecosystem, Name: e.Name}] = e + } + return m +} + +func TestDiffEntries(t *testing.T) { + base := mkMap( + manifest.Entry{Ecosystem: manifest.EcosystemNPM, Name: "stable", Version: "1.0.0", File: "package-lock.json"}, + manifest.Entry{Ecosystem: manifest.EcosystemNPM, Name: "bumped", Version: "1.0.0", File: "package-lock.json"}, + manifest.Entry{Ecosystem: manifest.EcosystemNPM, Name: "removed", Version: "1.0.0", File: "package-lock.json"}, + ) + head := mkMap( + manifest.Entry{Ecosystem: manifest.EcosystemNPM, Name: "stable", Version: "1.0.0", File: "package-lock.json"}, + manifest.Entry{Ecosystem: manifest.EcosystemNPM, Name: "bumped", Version: "2.0.0", File: "package-lock.json"}, + manifest.Entry{Ecosystem: manifest.EcosystemNPM, Name: "added", Version: "0.1.0", File: "package-lock.json"}, + ) + + got := diffEntries(base, head) + sort.Slice(got, func(i, j int) bool { return got[i].Name < got[j].Name }) + + want := []ChangedDep{ + {Ecosystem: manifest.EcosystemNPM, Name: "added", Version: "0.1.0", Kind: KindAdded, File: "package-lock.json"}, + {Ecosystem: manifest.EcosystemNPM, Name: "bumped", Version: "2.0.0", PreviousVersion: "1.0.0", Kind: KindBumped, File: "package-lock.json"}, + {Ecosystem: manifest.EcosystemNPM, Name: "removed", PreviousVersion: "1.0.0", Kind: KindRemoved, File: "package-lock.json"}, + } + if len(got) != len(want) { + t.Fatalf("got %d changes, want %d: %+v", len(got), len(want), got) + } + for i := range want { + if got[i] != want[i] { + t.Errorf("change[%d] = %+v, want %+v", i, got[i], want[i]) + } + } +} + +func TestDiffEntries_NoChange(t *testing.T) { + m := mkMap(manifest.Entry{Ecosystem: manifest.EcosystemGo, Name: "x", Version: "v1.0.0"}) + if got := diffEntries(m, m); len(got) != 0 { + t.Fatalf("expected no changes, got %+v", got) + } +} diff --git a/internal/scan/manifest/cargo_lock.go b/internal/scan/manifest/cargo_lock.go index 06a0611..bc47949 100644 --- a/internal/scan/manifest/cargo_lock.go +++ b/internal/scan/manifest/cargo_lock.go @@ -40,7 +40,7 @@ func (c *CargoLock) Parse(data []byte, path string) ([]Entry, error) { return } out = append(out, Entry{ - Ecosystem: "crates.io", + Ecosystem: EcosystemCargo, Name: name, Version: version, File: path, diff --git a/internal/scan/manifest/gemfile_lock.go b/internal/scan/manifest/gemfile_lock.go index 93c1fe9..19b90bf 100644 --- a/internal/scan/manifest/gemfile_lock.go +++ b/internal/scan/manifest/gemfile_lock.go @@ -88,7 +88,7 @@ func (g *GemfileLock) Parse(data []byte, path string) ([]Entry, error) { continue } out = append(out, Entry{ - Ecosystem: "rubygems", + Ecosystem: EcosystemRubyGems, Name: name, Version: version, File: path, diff --git a/internal/scan/manifest/go_mod.go b/internal/scan/manifest/go_mod.go index 9b599c8..0971653 100644 --- a/internal/scan/manifest/go_mod.go +++ b/internal/scan/manifest/go_mod.go @@ -53,7 +53,7 @@ func (g *GoMod) Parse(data []byte, path string) ([]Entry, error) { } seen[key] = true out = append(out, Entry{ - Ecosystem: "go", + Ecosystem: EcosystemGo, Name: modPath, Version: version, File: path, diff --git a/internal/scan/manifest/manifest.go b/internal/scan/manifest/manifest.go index fe3a651..70f0518 100644 --- a/internal/scan/manifest/manifest.go +++ b/internal/scan/manifest/manifest.go @@ -15,11 +15,25 @@ import ( "strings" ) -// Entry is one parsed dependency record. Ecosystem matches the -// vdb_ecosystem enum values so we don't transform names between the CLI -// and vuln-database. +// Ecosystem is a package ecosystem identifier. Values match the +// vdb_ecosystem enum the vuln-database expects, so they travel from the +// CLI to scpm to vuln-database untransformed — keep them in sync with +// that enum when adding a parser. +type Ecosystem string + +const ( + EcosystemNPM Ecosystem = "npm" + EcosystemPyPI Ecosystem = "pypi" + EcosystemGo Ecosystem = "go" + EcosystemCargo Ecosystem = "crates.io" + EcosystemRubyGems Ecosystem = "rubygems" +) + +func (e Ecosystem) String() string { return string(e) } + +// Entry is one parsed dependency record. type Entry struct { - Ecosystem string + Ecosystem Ecosystem Name string Version string // File is the repo-relative path this entry came from — useful for diff --git a/internal/scan/manifest/manifest_test.go b/internal/scan/manifest/manifest_test.go new file mode 100644 index 0000000..f11875e --- /dev/null +++ b/internal/scan/manifest/manifest_test.go @@ -0,0 +1,200 @@ +package manifest + +import ( + "testing" +) + +// entriesEqual compares two Entry slices ignoring order — parsers that +// iterate maps (npm) don't guarantee a stable order. +func entriesEqual(t *testing.T, got, want []Entry) { + t.Helper() + if len(got) != len(want) { + t.Fatalf("entry count = %d, want %d\ngot: %+v\nwant: %+v", len(got), len(want), got, want) + } + index := func(es []Entry) map[string]Entry { + m := map[string]Entry{} + for _, e := range es { + m[string(e.Ecosystem)+"|"+e.Name+"|"+e.Version] = e + } + return m + } + gi, wi := index(got), index(want) + for k, w := range wi { + g, ok := gi[k] + if !ok { + t.Fatalf("missing entry %q\ngot: %+v", k, got) + } + if g != w { + t.Fatalf("entry %q = %+v, want %+v", k, g, w) + } + } +} + +func TestNPMLock(t *testing.T) { + // lockfileVersion 3 packages map: scoped, nested node_modules, empty + // root, and a missing-version entry that must be skipped. + data := []byte(`{ + "lockfileVersion": 3, + "packages": { + "": {"name": "root", "version": "1.0.0"}, + "node_modules/lodash": {"version": "4.17.21"}, + "node_modules/@scope/pkg": {"version": "2.0.0"}, + "node_modules/a/node_modules/lodash": {"version": "3.0.0"}, + "node_modules/noversion": {"dev": true} + } + }`) + got, err := NewNPMLock().Parse(data, "package-lock.json") + if err != nil { + t.Fatalf("parse: %v", err) + } + entriesEqual(t, got, []Entry{ + {Ecosystem: EcosystemNPM, Name: "lodash", Version: "4.17.21", File: "package-lock.json"}, + {Ecosystem: EcosystemNPM, Name: "@scope/pkg", Version: "2.0.0", File: "package-lock.json"}, + // nested node_modules/a/node_modules/lodash → "lodash" @ 3.0.0 + {Ecosystem: EcosystemNPM, Name: "lodash", Version: "3.0.0", File: "package-lock.json"}, + }) +} + +func TestNPMLock_InvalidJSON(t *testing.T) { + if _, err := NewNPMLock().Parse([]byte("{not json"), "package-lock.json"); err == nil { + t.Fatal("expected error on invalid JSON") + } +} + +func TestPyPILock(t *testing.T) { + data := []byte(`# a comment +requests==2.32.3 +flask == 3.0.0 # inline comment +django==4.2.11; python_version >= "3.8" +requests[security]==2.31.0 +numpy==1.26.4 --hash=sha256:abc +-r other.txt +-e . +git+https://github.com/foo/bar.git +https://example.com/pkg.whl +uvicorn~=0.29 +blank-name== +==1.2.3 +`) + got, err := NewPyPILock().Parse(data, "requirements.txt") + if err != nil { + t.Fatalf("parse: %v", err) + } + entriesEqual(t, got, []Entry{ + {Ecosystem: EcosystemPyPI, Name: "requests", Version: "2.32.3", File: "requirements.txt", Direct: true}, + {Ecosystem: EcosystemPyPI, Name: "flask", Version: "3.0.0", File: "requirements.txt", Direct: true}, + {Ecosystem: EcosystemPyPI, Name: "django", Version: "4.2.11", File: "requirements.txt", Direct: true}, + {Ecosystem: EcosystemPyPI, Name: "requests", Version: "2.31.0", File: "requirements.txt", Direct: true}, + {Ecosystem: EcosystemPyPI, Name: "numpy", Version: "1.26.4", File: "requirements.txt", Direct: true}, + }) +} + +func TestGoSum(t *testing.T) { + // go.sum: dedupe (zip + /go.mod rows), skip /go.mod-only rows. + data := []byte(`github.com/foo/bar v1.2.3 h1:abc= +github.com/foo/bar v1.2.3/go.mod h1:def= +github.com/baz/qux v0.1.0/go.mod h1:ghi= +github.com/baz/qux v0.1.0 h1:jkl= +golang.org/x/sys v0.18.0 h1:mno= +`) + got, err := NewGoMod().Parse(data, "go.sum") + if err != nil { + t.Fatalf("parse: %v", err) + } + entriesEqual(t, got, []Entry{ + {Ecosystem: EcosystemGo, Name: "github.com/foo/bar", Version: "v1.2.3", File: "go.sum"}, + {Ecosystem: EcosystemGo, Name: "github.com/baz/qux", Version: "v0.1.0", File: "go.sum"}, + {Ecosystem: EcosystemGo, Name: "golang.org/x/sys", Version: "v0.18.0", File: "go.sum"}, + }) +} + +func TestCargoLock(t *testing.T) { + // Workspace member (no source) must be skipped; registry crates kept. + data := []byte(`# auto-generated +[[package]] +name = "my-app" +version = "0.1.0" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "tokio" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +foo = "bar" +`) + got, err := NewCargoLock().Parse(data, "Cargo.lock") + if err != nil { + t.Fatalf("parse: %v", err) + } + entriesEqual(t, got, []Entry{ + {Ecosystem: EcosystemCargo, Name: "serde", Version: "1.0.197", File: "Cargo.lock"}, + {Ecosystem: EcosystemCargo, Name: "tokio", Version: "1.36.0", File: "Cargo.lock"}, + }) +} + +func TestGemfileLock(t *testing.T) { + // Only indent-4 spec lines under GEM/specs:; indent-6 transitive + // declarations are skipped. + data := []byte("GEM\n" + + " remote: https://rubygems.org/\n" + + " specs:\n" + + " activerecord (7.1.3)\n" + + " activemodel (= 7.1.3)\n" + + " activesupport (= 7.1.3)\n" + + " i18n (1.14.1)\n" + + " concurrent-ruby (~> 1.0)\n" + + "PLATFORMS\n" + + " x86_64-linux\n" + + "DEPENDENCIES\n" + + " rails\n") + got, err := NewGemfileLock().Parse(data, "Gemfile.lock") + if err != nil { + t.Fatalf("parse: %v", err) + } + entriesEqual(t, got, []Entry{ + {Ecosystem: EcosystemRubyGems, Name: "activerecord", Version: "7.1.3", File: "Gemfile.lock"}, + {Ecosystem: EcosystemRubyGems, Name: "i18n", Version: "1.14.1", File: "Gemfile.lock"}, + }) +} + +func TestMatches(t *testing.T) { + cases := []struct { + parser Parser + path string + want bool + }{ + {NewNPMLock(), "frontend/package-lock.json", true}, + {NewNPMLock(), "package.json", false}, + {NewPyPILock(), "requirements.txt", true}, + {NewPyPILock(), "requirements-dev.txt", true}, + {NewGoMod(), "go.sum", true}, + {NewGoMod(), "go.mod", false}, + {NewCargoLock(), "Cargo.lock", true}, + {NewGemfileLock(), "Gemfile.lock", true}, + } + for _, c := range cases { + if got := c.parser.Matches(c.path); got != c.want { + t.Errorf("%s.Matches(%q) = %v, want %v", c.parser.Name(), c.path, got, c.want) + } + } +} + +func TestParseAll_FirstMatchWins(t *testing.T) { + files := []File{ + {Path: "go.sum", Data: []byte("github.com/foo/bar v1.0.0 h1:x=\n")}, + {Path: "README.md", Data: []byte("# not a lockfile")}, + } + res := ParseAll(DefaultParsers(), files) + if len(res.Entries) != 1 { + t.Fatalf("entries = %d, want 1", len(res.Entries)) + } + if len(res.Errors) != 0 { + t.Fatalf("errors = %v, want none", res.Errors) + } +} diff --git a/internal/scan/manifest/npm_lock.go b/internal/scan/manifest/npm_lock.go index c6b1ff8..fdbec2d 100644 --- a/internal/scan/manifest/npm_lock.go +++ b/internal/scan/manifest/npm_lock.go @@ -57,7 +57,7 @@ func (n *NPMLock) Parse(data []byte, path string) ([]Entry, error) { continue } out = append(out, Entry{ - Ecosystem: "npm", + Ecosystem: EcosystemNPM, Name: name, Version: pkg.Version, File: path, diff --git a/internal/scan/manifest/pypi_lock.go b/internal/scan/manifest/pypi_lock.go index 81fa81b..65b9993 100644 --- a/internal/scan/manifest/pypi_lock.go +++ b/internal/scan/manifest/pypi_lock.go @@ -85,7 +85,7 @@ func (p *PyPILock) Parse(data []byte, path string) ([]Entry, error) { name = name[:bracket] } out = append(out, Entry{ - Ecosystem: "pypi", + Ecosystem: EcosystemPyPI, Name: name, Version: ver, File: path,