From 75daed8de681ac8e3f7818281d78d46d9f224887 Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Sat, 21 Mar 2026 17:47:18 -0600 Subject: [PATCH 1/3] Add extract command to retrieve files from image layers Adds a new "extract" subcommand that extracts file contents from container image layers. Layers are searched in reverse order so the result matches the effective filesystem of a running container. Supports writing to stdout (default) or to a directory via --output_dir. --- main.go | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/main.go b/main.go index eb0f646..613d97a 100644 --- a/main.go +++ b/main.go @@ -5,8 +5,10 @@ import ( "fmt" "io" "os" + "path/filepath" "sort" "strconv" + "strings" "text/tabwriter" "github.com/dustin/go-humanize" @@ -61,6 +63,27 @@ func main() { }, }, }, + { + Name: "extract", + Usage: "extract files from an image and print to stdout", + Action: func(c *cli.Context) error { + cfg := &config{ + ref: c.Args().First(), + files: c.Args().Tail(), + outputDir: c.String("output_dir"), + } + if err := extract(cfg); err != nil { + return cli.Exit(c.Command.Name+": "+err.Error(), 1) + } + return nil + }, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "output_dir", + Usage: "Write extracted files to this directory instead of stdout", + }, + }, + }, }, } @@ -75,6 +98,10 @@ type config struct { layerIDs []string // sort is true if the output should be sorted by size. sort bool + // files is the list of file paths to extract. + files []string + // outputDir is the directory to write extracted files to. + outputDir string } // makeOptions returns the options for crane. @@ -271,3 +298,110 @@ func files(cfg *config, layer v1.Layer) error { return tw.Flush() } + +// extract extracts files from an image and writes them to stdout or a directory. +func extract(cfg *config) error { + if len(cfg.files) == 0 { + return fmt.Errorf("no files specified") + } + + image, err := getImage(cfg.ref) + if err != nil { + return err + } + + layers, err := image.Layers() + if err != nil { + return fmt.Errorf("getting layers: %w", err) + } + + // Build a set of wanted files for quick lookup. + // Normalize by stripping leading slash. + wanted := make(map[string]bool, len(cfg.files)) + for _, f := range cfg.files { + wanted[strings.TrimPrefix(f, "/")] = true + } + + found := make(map[string]bool, len(cfg.files)) + + // Search layers in reverse order (last wins) to match container runtime behavior. + for i := len(layers) - 1; i >= 0; i-- { + layer := layers[i] + + uncompressed, err := layer.Uncompressed() + if err != nil { + return fmt.Errorf("getting layer: %w", err) + } + + tarReader := tar.NewReader(uncompressed) + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + uncompressed.Close() + return fmt.Errorf("reading tar: %w", err) + } + + name := strings.TrimPrefix(header.Name, "./") + name = strings.TrimPrefix(name, "/") + + if !wanted[name] || found[name] { + continue + } + + if err := extractFile(cfg, name, tarReader); err != nil { + uncompressed.Close() + return err + } + found[name] = true + + // Stop early if all files found. + if len(found) == len(wanted) { + uncompressed.Close() + return nil + } + } + uncompressed.Close() + } + + // Report any files not found. + var missing []string + for _, f := range cfg.files { + name := strings.TrimPrefix(f, "/") + if !found[name] { + missing = append(missing, f) + } + } + if len(missing) > 0 { + return fmt.Errorf("files not found: %s", strings.Join(missing, ", ")) + } + + return nil +} + +// extractFile writes the contents of a tar entry to stdout or to a file under outputDir. +func extractFile(cfg *config, name string, r io.Reader) error { + if cfg.outputDir == "" { + _, err := io.Copy(os.Stdout, r) + return err + } + + outPath := filepath.Join(cfg.outputDir, name) + if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil { + return fmt.Errorf("creating directory for %s: %w", name, err) + } + + f, err := os.Create(outPath) + if err != nil { + return fmt.Errorf("creating file %s: %w", outPath, err) + } + defer f.Close() + + if _, err := io.Copy(f, r); err != nil { + return fmt.Errorf("writing file %s: %w", outPath, err) + } + + return nil +} From 145c5308d76c97a5314dfb747a6337970ef4fc11 Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Sat, 21 Mar 2026 17:54:04 -0600 Subject: [PATCH 2/3] Refactor for unit testing --- main.go | 26 ++++- main_test.go | 272 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 294 insertions(+), 4 deletions(-) create mode 100644 main_test.go diff --git a/main.go b/main.go index 613d97a..bc9f1ce 100644 --- a/main.go +++ b/main.go @@ -33,6 +33,7 @@ func main() { Action: func(c *cli.Context) error { cfg := &config{ ref: c.Args().First(), + out: os.Stdout, } if err := inspect(cfg); err != nil { return cli.Exit(c.Command.Name+": "+err.Error(), 1) @@ -48,6 +49,7 @@ func main() { ref: c.Args().First(), layerIDs: c.Args().Tail(), sort: c.Bool("sort"), + out: os.Stdout, } if err := ls(cfg); err != nil { @@ -71,6 +73,7 @@ func main() { ref: c.Args().First(), files: c.Args().Tail(), outputDir: c.String("output_dir"), + out: os.Stdout, } if err := extract(cfg); err != nil { return cli.Exit(c.Command.Name+": "+err.Error(), 1) @@ -102,6 +105,8 @@ type config struct { files []string // outputDir is the directory to write extracted files to. outputDir string + // out is the writer for output. + out io.Writer } // makeOptions returns the options for crane. @@ -175,13 +180,17 @@ func inspect(cfg *config) error { if err != nil { return err } + return inspectImage(cfg, image) +} +// inspectImage prints info about the layers of an image. +func inspectImage(cfg *config, image v1.Image) error { layers, err := image.Layers() if err != nil { return err } - tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + tw := tabwriter.NewWriter(cfg.out, 0, 0, 2, ' ', 0) tw.Write([]byte("N\tLayer\tSize\n")) for i, layer := range layers { hash, err := layer.DiffID() @@ -205,7 +214,11 @@ func ls(cfg *config) error { if err != nil { return err } + return lsImage(cfg, image) +} +// lsImage lists files in the layers of an image. +func lsImage(cfg *config, image v1.Image) error { layers, err := image.Layers() if err != nil { return fmt.Errorf("getting layers: %w", err) @@ -264,8 +277,8 @@ func files(cfg *config, layer v1.Layer) error { tarReader := tar.NewReader(uncompressed) - fmt.Printf("\n--- %s ---\n", hash) - tw := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) + fmt.Fprintf(cfg.out, "\n--- %s ---\n", hash) + tw := tabwriter.NewWriter(cfg.out, 0, 0, 2, ' ', 0) tw.Write([]byte("Mode\tSize\tName\n")) headers := make([]*tar.Header, 0) @@ -310,6 +323,11 @@ func extract(cfg *config) error { return err } + return extractFromImage(cfg, image) +} + +// extractFromImage extracts files from the given image. +func extractFromImage(cfg *config, image v1.Image) error { layers, err := image.Layers() if err != nil { return fmt.Errorf("getting layers: %w", err) @@ -384,7 +402,7 @@ func extract(cfg *config) error { // extractFile writes the contents of a tar entry to stdout or to a file under outputDir. func extractFile(cfg *config, name string, r io.Reader) error { if cfg.outputDir == "" { - _, err := io.Copy(os.Stdout, r) + _, err := io.Copy(cfg.out, r) return err } diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..3d2e261 --- /dev/null +++ b/main_test.go @@ -0,0 +1,272 @@ +package main + +import ( + "archive/tar" + "bytes" + "os" + "path/filepath" + "strings" + "testing" + + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/empty" + "github.com/google/go-containerregistry/pkg/v1/mutate" + "github.com/google/go-containerregistry/pkg/v1/tarball" +) + +// createTestLayer creates a v1.Layer from a map of filename to content. +func createTestLayer(t *testing.T, fileContents map[string]string) v1.Layer { + t.Helper() + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + for name, content := range fileContents { + if err := tw.WriteHeader(&tar.Header{ + Name: name, + Mode: 0644, + Size: int64(len(content)), + Typeflag: tar.TypeReg, + }); err != nil { + t.Fatal(err) + } + if _, err := tw.Write([]byte(content)); err != nil { + t.Fatal(err) + } + } + if err := tw.Close(); err != nil { + t.Fatal(err) + } + layer, err := tarball.LayerFromReader(&buf) + if err != nil { + t.Fatal(err) + } + return layer +} + +// createTestImage creates a v1.Image with the given layers. +func createTestImage(t *testing.T, layers ...v1.Layer) v1.Image { + t.Helper() + img, err := mutate.AppendLayers(empty.Image, layers...) + if err != nil { + t.Fatal(err) + } + return img +} + +func TestInspectImage(t *testing.T) { + layer1 := createTestLayer(t, map[string]string{"a.txt": "hello"}) + layer2 := createTestLayer(t, map[string]string{"b.txt": "world"}) + img := createTestImage(t, layer1, layer2) + + var buf bytes.Buffer + cfg := &config{out: &buf} + + if err := inspectImage(cfg, img); err != nil { + t.Fatal(err) + } + + output := buf.String() + lines := strings.Split(strings.TrimSpace(output), "\n") + // Header + 2 layer lines + if len(lines) != 3 { + t.Fatalf("expected 3 lines, got %d:\n%s", len(lines), output) + } + if !strings.Contains(lines[0], "N") || !strings.Contains(lines[0], "Layer") || !strings.Contains(lines[0], "Size") { + t.Errorf("unexpected header: %s", lines[0]) + } + if !strings.HasPrefix(strings.TrimSpace(lines[1]), "1") { + t.Errorf("expected line 1 to start with '1': %s", lines[1]) + } + if !strings.HasPrefix(strings.TrimSpace(lines[2]), "2") { + t.Errorf("expected line 2 to start with '2': %s", lines[2]) + } +} + +func TestFiles(t *testing.T) { + layer := createTestLayer(t, map[string]string{ + "app/main.go": "package main", + "app/utils.go": "package utils", + "README.md": "# Project", + }) + + var buf bytes.Buffer + cfg := &config{out: &buf} + + if err := files(cfg, layer); err != nil { + t.Fatal(err) + } + + output := buf.String() + for _, name := range []string{"app/main.go", "app/utils.go", "README.md"} { + if !strings.Contains(output, name) { + t.Errorf("expected %q in output:\n%s", name, output) + } + } + if !strings.Contains(output, "Mode") || !strings.Contains(output, "Size") || !strings.Contains(output, "Name") { + t.Errorf("expected header in output:\n%s", output) + } +} + +func TestFilesSorted(t *testing.T) { + layer := createTestLayer(t, map[string]string{ + "small.txt": "x", + "medium.txt": "xxxx", + "large.txt": strings.Repeat("x", 100), + }) + + var buf bytes.Buffer + cfg := &config{out: &buf, sort: true} + + if err := files(cfg, layer); err != nil { + t.Fatal(err) + } + + output := buf.String() + largeIdx := strings.Index(output, "large.txt") + mediumIdx := strings.Index(output, "medium.txt") + smallIdx := strings.Index(output, "small.txt") + + if largeIdx < 0 || mediumIdx < 0 || smallIdx < 0 { + t.Fatalf("missing files in output:\n%s", output) + } + if !(largeIdx < mediumIdx && mediumIdx < smallIdx) { + t.Errorf("files not sorted by size (large=%d, medium=%d, small=%d):\n%s", + largeIdx, mediumIdx, smallIdx, output) + } +} + +func TestExtractToStdout(t *testing.T) { + layer := createTestLayer(t, map[string]string{ + "hello.txt": "hello world", + }) + img := createTestImage(t, layer) + + var buf bytes.Buffer + cfg := &config{ + files: []string{"hello.txt"}, + out: &buf, + } + + if err := extractFromImage(cfg, img); err != nil { + t.Fatal(err) + } + + if got := buf.String(); got != "hello world" { + t.Errorf("expected %q, got %q", "hello world", got) + } +} + +func TestExtractLastLayerWins(t *testing.T) { + layer1 := createTestLayer(t, map[string]string{ + "config.json": `{"version": 1}`, + }) + layer2 := createTestLayer(t, map[string]string{ + "config.json": `{"version": 2}`, + }) + img := createTestImage(t, layer1, layer2) + + var buf bytes.Buffer + cfg := &config{ + files: []string{"config.json"}, + out: &buf, + } + + if err := extractFromImage(cfg, img); err != nil { + t.Fatal(err) + } + + if got := buf.String(); got != `{"version": 2}` { + t.Errorf("expected version 2 from last layer, got %q", got) + } +} + +func TestExtractFileNotFound(t *testing.T) { + layer := createTestLayer(t, map[string]string{ + "exists.txt": "content", + }) + img := createTestImage(t, layer) + + var buf bytes.Buffer + cfg := &config{ + files: []string{"nonexistent.txt"}, + out: &buf, + } + + err := extractFromImage(cfg, img) + if err == nil { + t.Fatal("expected error for missing file") + } + if !strings.Contains(err.Error(), "not found") { + t.Errorf("expected 'not found' error, got: %v", err) + } +} + +func TestExtractToOutputDir(t *testing.T) { + layer := createTestLayer(t, map[string]string{ + "app/config.yaml": "key: value", + }) + img := createTestImage(t, layer) + + dir := t.TempDir() + cfg := &config{ + files: []string{"app/config.yaml"}, + outputDir: dir, + out: &bytes.Buffer{}, + } + + if err := extractFromImage(cfg, img); err != nil { + t.Fatal(err) + } + + data, err := os.ReadFile(filepath.Join(dir, "app", "config.yaml")) + if err != nil { + t.Fatal(err) + } + if string(data) != "key: value" { + t.Errorf("expected %q, got %q", "key: value", string(data)) + } +} + +func TestLsImage(t *testing.T) { + layer1 := createTestLayer(t, map[string]string{"layer1.txt": "a"}) + layer2 := createTestLayer(t, map[string]string{"layer2.txt": "b"}) + img := createTestImage(t, layer1, layer2) + + var buf bytes.Buffer + cfg := &config{out: &buf} + + if err := lsImage(cfg, img); err != nil { + t.Fatal(err) + } + + output := buf.String() + if !strings.Contains(output, "layer1.txt") { + t.Errorf("expected layer1.txt in output:\n%s", output) + } + if !strings.Contains(output, "layer2.txt") { + t.Errorf("expected layer2.txt in output:\n%s", output) + } +} + +func TestLsImageWithLayerID(t *testing.T) { + layer1 := createTestLayer(t, map[string]string{"layer1.txt": "a"}) + layer2 := createTestLayer(t, map[string]string{"layer2.txt": "b"}) + img := createTestImage(t, layer1, layer2) + + var buf bytes.Buffer + cfg := &config{ + layerIDs: []string{"1"}, + out: &buf, + } + + if err := lsImage(cfg, img); err != nil { + t.Fatal(err) + } + + output := buf.String() + if !strings.Contains(output, "layer1.txt") { + t.Errorf("expected layer1.txt in output:\n%s", output) + } + if strings.Contains(output, "layer2.txt") { + t.Errorf("did not expect layer2.txt for layer 1 only:\n%s", output) + } +} From 643468ada5b41c594c7f0573e8a8ccebe267cc6e Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Sat, 21 Mar 2026 17:54:38 -0600 Subject: [PATCH 3/3] add pr gh action --- .github/workflows/pr.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/pr.yaml diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml new file mode 100644 index 0000000..a32cf7d --- /dev/null +++ b/.github/workflows/pr.yaml @@ -0,0 +1,30 @@ +name: PR + +on: + pull_request: + branches: + - master + +jobs: + check: + name: Vet, Build, Test + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v2 + + - name: Set up Go 1.16 + uses: actions/setup-go@v2 + with: + go-version: 1.16 + id: go + + - name: Vet + run: go vet ./... + + - name: Build + run: go build ./... + + - name: Test + run: go test ./...