diff --git a/go.mod b/go.mod
index 555605d..0b13e44 100644
--- a/go.mod
+++ b/go.mod
@@ -1,10 +1,11 @@
 module github.com/containerd/continuity
 
-go 1.21
+go 1.23
 
 require (
 	github.com/Microsoft/go-winio v0.6.2
 	github.com/containerd/log v0.1.0
+	github.com/erofs/go-erofs v0.3.1-0.20260531080512-069dc32d83e6
 	github.com/opencontainers/go-digest v1.0.0
 	golang.org/x/sync v0.8.0
 	golang.org/x/sys v0.26.0
diff --git a/go.sum b/go.sum
index 4cd8137..2410be8 100644
--- a/go.sum
+++ b/go.sum
@@ -5,6 +5,8 @@ github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/erofs/go-erofs v0.3.1-0.20260531080512-069dc32d83e6 h1:a9BU6HU86UHLPxkABcUIoLLClluURHpYLU6fM88VrjU=
+github.com/erofs/go-erofs v0.3.1-0.20260531080512-069dc32d83e6/go.mod h1:XkSeN9MHszGd4+3gcEjadJLYHCQpWzJ7/8yznzMuzJs=
 github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
diff --git a/tarconv/apply.go b/tarconv/apply.go
new file mode 100644
index 0000000..1704d5b
--- /dev/null
+++ b/tarconv/apply.go
@@ -0,0 +1,514 @@
+// Package tarconv ingests OCI/Docker tar layer streams into an [erofs.Writer]
+// via direct writer calls, without staging an intermediate fs.FS.
+//
+// The single entry point is [Apply]. It handles all tar entry types (regular
+// files, directories, symlinks, hard links, device nodes, FIFOs) and three
+// whiteout strategies selectable via options:
+//
+//   - Default (no option): translate AUFS/OCI whiteouts to overlayfs xattrs.
+//     Suitable for per-layer EROFS images that will be stacked at runtime.
+//   - [WithMerge]: resolve whiteouts structurally by removing entries.
+//     Suitable for flat merged images where all layers are applied in sequence.
+//   - [WithPreserveWhiteouts]: keep .wh.* entries as plain files.
+//     Suitable for tooling that needs the raw tar content.
+package tarconv
+
+import (
+	archivetar "archive/tar"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"path"
+	"strings"
+
+	erofs "github.com/erofs/go-erofs"
+)
+
+// Unix inode type bits (S_IF*), matching the values expected by erofs.Writer.Mknod.
+const (
+	sifChrdev = uint16(0020000) // character device
+	sifBlkdev = uint16(0060000) // block device
+	sifFifo   = uint16(0010000) // FIFO / named pipe
+)
+
+const (
+	whiteoutPrefix     = ".wh."
+	opaqueWhiteout     = ".wh..wh..opq"
+	overlayOpaqueXattr = "trusted.overlay.opaque"
+	overlayOriginXattr = "trusted.overlay.origin"
+	xattrPrefix        = "SCHILY.xattr."
+)
+
+// whiteoutMode selects how AUFS/OCI whiteout entries are processed.
+type whiteoutMode int
+
+const (
+	// whiteoutConvert is the default: translate whiteouts to overlayfs representation.
+	whiteoutConvert whiteoutMode = iota
+	// whiteoutMerge resolves whiteouts by removing entries from the writer tree.
+	whiteoutMerge
+	// whiteoutPreserve keeps whiteout entries as plain regular files.
+	whiteoutPreserve
+)
+
+// config holds the parsed options for an Apply call.
+type config struct {
+	whiteouts whiteoutMode
+}
+
+// Option configures an [Apply] call.
+type Option func(*config)
+
+// WithMerge makes Apply resolve AUFS/OCI whiteout entries structurally:
+//   - .wh.<name> removes the sibling path from the writer's current tree.
+//     ErrNotExist is silently swallowed (the target may not yet exist in any
+//     layer seen so far).
+//   - .wh..wh..opq removes all existing children of the containing directory,
+//     leaving the directory itself so subsequent entries can repopulate it.
+//
+// The resulting image is a flat merged filesystem with no overlay xattrs.
+// Use WithMerge when calling Apply once per layer to build a single merged image.
+func WithMerge() Option {
+	return func(c *config) { c.whiteouts = whiteoutMerge }
+}
+
+// WithPreserveWhiteouts makes Apply treat .wh.* and .wh..wh..opq entries as
+// ordinary regular files, performing no whiteout translation. The raw tar
+// content is preserved verbatim.
+func WithPreserveWhiteouts() Option {
+	return func(c *config) { c.whiteouts = whiteoutPreserve }
+}
+
+// pendingLink records a hard link whose target had not yet appeared when the
+// link entry was processed. Only header metadata is stored; no payload bytes.
+type pendingLink struct {
+	newname string
+	oldname string // target path, cleaned
+	hdr     archivetar.Header
+}
+
+// Apply ingests the tar stream r into w, translating each entry into a direct
+// [erofs.Writer] call.
+//
+// By default (no options), AUFS/OCI whiteout entries are translated to
+// overlayfs-compatible representation:
+//   - .wh.<name> becomes a character device 0/0 at the sibling path, and the
+//     containing directory receives trusted.overlay.origin="".
+//   - .wh..wh..opq sets trusted.overlay.opaque=y on the containing directory.
+//
+// This matches the behaviour of mkfs.erofs --aufs and is appropriate for
+// single-layer EROFS images that will be stacked by an overlayfs consumer.
+//
+// Use [WithMerge] to resolve whiteouts structurally instead (flat merged image).
+// Use [WithPreserveWhiteouts] to keep whiteout entries as plain files.
+//
+// Hard links may appear in any order. Links whose targets have not yet appeared
+// are queued and resolved as subsequent entries are processed. An unresolved
+// hard link at EOF is returned as an error.
+func Apply(w *erofs.Writer, r io.Reader, opts ...Option) error {
+	var cfg config
+	for _, o := range opts {
+		o(&cfg)
+	}
+
+	tr := archivetar.NewReader(r)
+
+	// pending records hard links whose targets haven't appeared yet.
+	var pending []pendingLink
+
+	// pendingOrigin records directories that need trusted.overlay.origin=""
+	// set once their TypeDir entry appears (handles whiteout-before-dir order).
+	var pendingOrigin map[string]bool
+
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return fmt.Errorf("tarconv: %w", err)
+		}
+
+		p := cleanTarPath(hdr.Name)
+		base := path.Base(p)
+		dir := path.Dir(p)
+
+		// --- Whiteout detection ---
+		// OCI whiteouts use TypeReg. Detect by name prefix and dispatch
+		// before the normal type switch so they are never added as real entries
+		// (unless WithPreserveWhiteouts is active).
+		if cfg.whiteouts != whiteoutPreserve && strings.HasPrefix(base, whiteoutPrefix) {
+			if base == opaqueWhiteout {
+				switch cfg.whiteouts {
+				case whiteoutMerge:
+					if err := removeChildren(w, dir); err != nil {
+						return fmt.Errorf("tarconv: opaque %s: %w", dir, err)
+					}
+				default: // whiteoutConvert
+					if err := setOpaqueXattr(w, dir, hdr); err != nil {
+						return fmt.Errorf("tarconv: opaque %s: %w", dir, err)
+					}
+				}
+			} else {
+				target := path.Join(dir, base[len(whiteoutPrefix):])
+				switch cfg.whiteouts {
+				case whiteoutMerge:
+					if err := w.Remove(target); err != nil && !errors.Is(err, fs.ErrNotExist) {
+						return fmt.Errorf("tarconv: whiteout %s: %w", target, err)
+					}
+				default: // whiteoutConvert
+					if err := emitWhiteout(w, target, hdr); err != nil {
+						return fmt.Errorf("tarconv: whiteout %s: %w", target, err)
+					}
+					// Set trusted.overlay.origin="" on the parent directory to
+					// match mkfs.erofs --aufs behaviour for regular whiteouts.
+					if _, serr := w.Stat(dir); serr == nil {
+						if err := w.Setxattr(dir, overlayOriginXattr, ""); err != nil {
+							return fmt.Errorf("tarconv: whiteout origin %s: %w", dir, err)
+						}
+					} else {
+						// Dir not yet seen — queue for when it appears.
+						if pendingOrigin == nil {
+							pendingOrigin = make(map[string]bool)
+						}
+						pendingOrigin[dir] = true
+					}
+				}
+			}
+			// Drain any data bytes (whiteouts are zero-size in practice but be safe).
+			if _, err := io.Copy(io.Discard, tr); err != nil {
+				return fmt.Errorf("tarconv: drain %s: %w", p, err)
+			}
+			continue
+		}
+
+		// --- Normal entry dispatch ---
+		switch hdr.Typeflag {
+		case archivetar.TypeDir:
+			if err := addDir(w, p, hdr); err != nil {
+				return fmt.Errorf("tarconv: %s: %w", p, err)
+			}
+			if pendingOrigin[p] {
+				if err := w.Setxattr(p, overlayOriginXattr, ""); err != nil {
+					return fmt.Errorf("tarconv: whiteout origin %s: %w", p, err)
+				}
+				delete(pendingOrigin, p)
+			}
+
+		case archivetar.TypeReg, archivetar.TypeRegA: //nolint:staticcheck
+			// Remove any existing entry to handle tar overwrite semantics.
+			removeExisting(w, p)
+			if err := addFile(w, p, hdr, tr); err != nil {
+				return fmt.Errorf("tarconv: %s: %w", p, err)
+			}
+			pending = replayPending(w, pending)
+
+		case archivetar.TypeSymlink:
+			removeExisting(w, p)
+			if err := addSymlink(w, p, hdr); err != nil {
+				return fmt.Errorf("tarconv: %s: %w", p, err)
+			}
+			pending = replayPending(w, pending)
+
+		case archivetar.TypeLink:
+			oldname := cleanTarPath(hdr.Linkname)
+			err := w.Link(oldname, p)
+			if err == nil {
+				if err := applyMetadata(w, p, hdr); err != nil {
+					return fmt.Errorf("tarconv: %s metadata: %w", p, err)
+				}
+				pending = replayPending(w, pending)
+			} else if isNotExist(err) {
+				pending = append(pending, pendingLink{newname: p, oldname: oldname, hdr: *hdr})
+			} else {
+				return fmt.Errorf("tarconv: hardlink %s→%s: %w", p, oldname, err)
+			}
+
+		case archivetar.TypeChar, archivetar.TypeBlock:
+			removeExisting(w, p)
+			if err := addDevice(w, p, hdr); err != nil {
+				return fmt.Errorf("tarconv: %s: %w", p, err)
+			}
+			pending = replayPending(w, pending)
+
+		case archivetar.TypeFifo:
+			removeExisting(w, p)
+			if err := addFifo(w, p, hdr); err != nil {
+				return fmt.Errorf("tarconv: %s: %w", p, err)
+			}
+			pending = replayPending(w, pending)
+
+		case archivetar.TypeXGlobalHeader:
+			// archive/tar merges PAX global headers into subsequent entries automatically.
+
+		default:
+			// Skip unrecognised entry types so future tar extensions don't break consumers.
+		}
+	}
+
+	// Drain the remainder of the underlying stream to EOF. Tar archives have
+	// end-of-archive padding (two 512-byte zero blocks) and callers may wrap r
+	// in a pipe or network stream that requires the reader side to be fully
+	// consumed before the writer side can detect a clean close.
+	_, _ = io.Copy(io.Discard, r)
+
+	if len(pending) > 0 {
+		return fmt.Errorf("tarconv: unresolved hard link %q → %q (target never appeared)",
+			pending[0].newname, pending[0].oldname)
+	}
+	return nil
+}
+
+// --- Entry creation helpers ---
+
+func addDir(w *erofs.Writer, p string, hdr *archivetar.Header) error {
+	if err := w.Mkdir(p, tarModeToGoMode(hdr.Mode)); err != nil {
+		// Tar archives commonly emit directory entries multiple times (once
+		// implicitly when a child is created, once explicitly with metadata).
+		// If the path already exists as a directory treat it as a metadata update.
+		if isDuplicatePath(err) {
+			if info, serr := w.Stat(p); serr == nil && info.IsDir() {
+				return applyMetadata(w, p, hdr)
+			}
+		}
+		return err
+	}
+	return applyMetadata(w, p, hdr)
+}
+
+func addFile(w *erofs.Writer, p string, hdr *archivetar.Header, tr *archivetar.Reader) error {
+	f, err := w.Create(p)
+	if err != nil {
+		return err
+	}
+	if _, err := io.Copy(f, tr); err != nil {
+		_ = f.Close()
+		return fmt.Errorf("copy data: %w", err)
+	}
+	if err := f.Chmod(tarModeToGoMode(hdr.Mode)); err != nil {
+		_ = f.Close()
+		return err
+	}
+	if err := f.Chown(hdr.Uid, hdr.Gid); err != nil {
+		_ = f.Close()
+		return err
+	}
+	if err := f.Close(); err != nil {
+		return err
+	}
+	return applyMetadata(w, p, hdr)
+}
+
+func addSymlink(w *erofs.Writer, p string, hdr *archivetar.Header) error {
+	if err := w.Symlink(hdr.Linkname, p); err != nil {
+		return err
+	}
+	return applyMetadata(w, p, hdr)
+}
+
+func addDevice(w *erofs.Writer, p string, hdr *archivetar.Header) error {
+	typeBits := sifBlkdev
+	if hdr.Typeflag == archivetar.TypeChar {
+		typeBits = sifChrdev
+	}
+	mode := typeBits | uint16(tarModeToGoMode(hdr.Mode).Perm())
+	if err := w.Mknod(p, mode, mkdev(hdr.Devmajor, hdr.Devminor)); err != nil {
+		return err
+	}
+	return applyMetadata(w, p, hdr)
+}
+
+func addFifo(w *erofs.Writer, p string, hdr *archivetar.Header) error {
+	mode := sifFifo | uint16(tarModeToGoMode(hdr.Mode).Perm())
+	if err := w.Mknod(p, mode, 0); err != nil {
+		return err
+	}
+	return applyMetadata(w, p, hdr)
+}
+
+// emitWhiteout creates an overlayfs whiteout device (char 0:0, mode 0) at
+// target, used by the default whiteout convert mode.
+func emitWhiteout(w *erofs.Writer, target string, hdr *archivetar.Header) error {
+	removeExisting(w, target)
+	if err := w.Mknod(target, sifChrdev, 0); err != nil {
+		return err
+	}
+	return w.Chtimes(target, hdr.ModTime, hdr.ModTime)
+}
+
+// setOpaqueXattr sets trusted.overlay.opaque=y on dir, used by the default
+// whiteout convert mode for .wh..wh..opq entries. If the directory does not
+// yet exist a placeholder is created; a later TypeDir entry will update it.
+func setOpaqueXattr(w *erofs.Writer, dir string, hdr *archivetar.Header) error {
+	if _, err := w.Stat(dir); errors.Is(err, fs.ErrNotExist) {
+		if err := w.Mkdir(dir, 0o755); err != nil {
+			return err
+		}
+		_ = w.Chtimes(dir, hdr.ModTime, hdr.ModTime)
+	}
+	return w.Setxattr(dir, overlayOpaqueXattr, "y")
+}
+
+// removeChildren removes all direct and indirect descendants of dir from w.
+// The directory itself is kept. Used by WithMerge for opaque directories.
+func removeChildren(w *erofs.Writer, dir string) error {
+	f, err := w.Open(dir)
+	if err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return nil
+		}
+		return err
+	}
+	defer f.Close()
+	rdf, ok := f.(fs.ReadDirFile)
+	if !ok {
+		return nil
+	}
+	children, err := rdf.ReadDir(-1)
+	if err != nil {
+		return err
+	}
+	for _, child := range children {
+		childPath := path.Join(dir, child.Name())
+		if child.IsDir() {
+			if err := removeAll(w, childPath); err != nil {
+				return err
+			}
+		} else {
+			if err := w.Remove(childPath); err != nil && !errors.Is(err, fs.ErrNotExist) {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// removeAll recursively removes p and all its descendants from w.
+func removeAll(w *erofs.Writer, p string) error {
+	info, err := w.Stat(p)
+	if err != nil {
+		if errors.Is(err, fs.ErrNotExist) {
+			return nil
+		}
+		return err
+	}
+	if info.IsDir() {
+		if err := removeChildren(w, p); err != nil {
+			return err
+		}
+	}
+	if err := w.Remove(p); err != nil && !errors.Is(err, fs.ErrNotExist) {
+		return err
+	}
+	return nil
+}
+
+// removeExisting removes p silently. Used before re-creating a path to handle
+// tar overwrite semantics.
+func removeExisting(w *erofs.Writer, p string) {
+	_ = removeAll(w, p)
+}
+
+// applyMetadata applies uid/gid, mtime, full mode (including special bits),
+// and xattrs from hdr to path p.
+func applyMetadata(w *erofs.Writer, p string, hdr *archivetar.Header) error {
+	if err := w.Chown(p, hdr.Uid, hdr.Gid); err != nil {
+		return err
+	}
+	if err := w.Chmod(p, tarModeToGoMode(hdr.Mode)); err != nil {
+		return err
+	}
+	if !hdr.ModTime.IsZero() {
+		if err := w.Chtimes(p, hdr.ModTime, hdr.ModTime); err != nil {
+			return err
+		}
+	}
+	for k, v := range extractXattrs(hdr) {
+		if err := w.Setxattr(p, k, v); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// extractXattrs returns PAX xattr records from hdr with the SCHILY.xattr. prefix stripped.
+func extractXattrs(hdr *archivetar.Header) map[string]string {
+	if len(hdr.PAXRecords) == 0 {
+		return nil
+	}
+	var result map[string]string
+	for k, v := range hdr.PAXRecords {
+		if strings.HasPrefix(k, xattrPrefix) {
+			if result == nil {
+				result = make(map[string]string)
+			}
+			result[k[len(xattrPrefix):]] = v
+		}
+	}
+	return result
+}
+
+// replayPending tries to resolve queued hard links. Repeats until no progress
+// is made to handle chains of pending links.
+func replayPending(w *erofs.Writer, pending []pendingLink) []pendingLink {
+	for {
+		var remaining []pendingLink
+		progress := false
+		for _, pl := range pending {
+			if err := w.Link(pl.oldname, pl.newname); err == nil {
+				_ = applyMetadata(w, pl.newname, &pl.hdr)
+				progress = true
+			} else {
+				remaining = append(remaining, pl)
+			}
+		}
+		pending = remaining
+		if !progress {
+			break
+		}
+	}
+	return pending
+}
+
+// isNotExist reports whether err indicates a path does not exist.
+func isNotExist(err error) bool {
+	return errors.Is(err, fs.ErrNotExist) || strings.Contains(err.Error(), "not found")
+}
+
+// isDuplicatePath reports whether err is the "duplicate path" error from erofs.Writer.
+func isDuplicatePath(err error) bool {
+	return err != nil && strings.Contains(err.Error(), "duplicate path")
+}
+
+// tarModeToGoMode converts a tar header Mode (unix mode bits) to fs.FileMode,
+// correctly translating the special bits (setuid/setgid/sticky).
+func tarModeToGoMode(mode int64) fs.FileMode {
+	m := fs.FileMode(mode & 0o777)
+	if mode&0o4000 != 0 {
+		m |= fs.ModeSetuid
+	}
+	if mode&0o2000 != 0 {
+		m |= fs.ModeSetgid
+	}
+	if mode&0o1000 != 0 {
+		m |= fs.ModeSticky
+	}
+	return m
+}
+
+// cleanTarPath converts a tar header name to a cleaned absolute path.
+func cleanTarPath(name string) string {
+	if name == "." || name == "" {
+		return "/"
+	}
+	if name[0] != '/' {
+		name = "/" + name
+	}
+	return path.Clean(name)
+}
+
+// mkdev constructs a Linux device number from major and minor components.
+func mkdev(major, minor int64) uint32 {
+	return uint32((major << 8) | (minor & 0xff) | ((minor & ^int64(0xff)) << 12))
+}
diff --git a/tarconv/bench_test.go b/tarconv/bench_test.go
new file mode 100644
index 0000000..1b2008e
--- /dev/null
+++ b/tarconv/bench_test.go
@@ -0,0 +1,384 @@
+package tarconv_test
+
+// Benchmarks for tar.Convert and tar.Merge, with optional comparison against
+// mkfs.erofs when it is present in PATH.
+//
+// Four synthetic workloads model real container image shapes:
+//
+//   Small  – ~200 entries, ~1MB data.   Typical Alpine base layer.
+//   Medium – ~1000 entries, ~10MB data. Python/Node package install layer.
+//   Large  – ~5000 entries, ~50MB data. Large app or source-tree layer.
+//   Huge   – ~500 entries, ~100MB data. A few large binary files; exercises
+//             raw I/O throughput where per-process spawn overhead is ~0%.
+//
+// Each workload runs:
+//   BenchmarkConvert/<size>       – tar.Convert (layer mode)
+//   BenchmarkMerge/<size>         – tar.Merge (merge mode, 2-layer scenario)
+//   BenchmarkMkfsConvert/<size>   – mkfs.erofs --tar=f (skipped if not in PATH)
+//
+// Run with (recommended for stable numbers):
+//   go test ./tar/... -bench=. -benchtime=10s -count=3 -benchmem
+
+import (
+	"archive/tar"
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"testing"
+	"time"
+
+	erofs "github.com/erofs/go-erofs"
+	"github.com/containerd/continuity/tarconv"
+)
+
+// workload describes a set of synthetic tar entries to benchmark.
+type workload struct {
+	name    string
+	entries func() []tarEntry
+}
+
+// tarEntry is a single entry to write into a tar.
+type tarEntry struct {
+	hdr  tar.Header
+	data []byte
+}
+
+var benchEpoch = time.Unix(1700000000, 0)
+
+// smallWorkload simulates an Alpine-like base layer (~200 entries, ~1MB data).
+func smallWorkload() []tarEntry {
+	return syntheticLayer(
+		layerSpec{dirs: 20, filesPerDir: 5, fileSize: 1024, symlinks: 10, hardLinkFraction: 0.05},
+	)
+}
+
+// mediumWorkload simulates a package install layer (~1000 entries, ~10MB data).
+func mediumWorkload() []tarEntry {
+	return syntheticLayer(
+		layerSpec{dirs: 50, filesPerDir: 15, fileSize: 4096, symlinks: 50, hardLinkFraction: 0.1},
+	)
+}
+
+// largeWorkload simulates a source-tree or large-app layer (~5000 entries, ~50MB data).
+func largeWorkload() []tarEntry {
+	return syntheticLayer(
+		layerSpec{dirs: 100, filesPerDir: 40, fileSize: 8192, symlinks: 100, hardLinkFraction: 0.05},
+	)
+}
+
+// hugeWorkload simulates a layer dominated by a few large binary files
+// (~500 entries, ~100MB data). This eliminates per-process spawn overhead
+// from the mkfs comparison and isolates raw I/O throughput.
+func hugeWorkload() []tarEntry {
+	return syntheticLayer(
+		layerSpec{dirs: 20, filesPerDir: 20, fileSize: 256 * 1024, symlinks: 20, hardLinkFraction: 0.02},
+	)
+}
+
+type layerSpec struct {
+	dirs             int
+	filesPerDir      int
+	fileSize         int
+	symlinks         int
+	hardLinkFraction float64
+}
+
+// syntheticLayer generates a realistic tar layer according to spec.
+func syntheticLayer(s layerSpec) []tarEntry {
+	var entries []tarEntry
+
+	// Root.
+	entries = append(entries, tarEntry{hdr: tar.Header{
+		Typeflag: tar.TypeDir, Name: "./", Mode: 0o755,
+		Uid: 0, Gid: 0, ModTime: benchEpoch,
+	}})
+
+	// Standard directory skeleton.
+	skeletonDirs := []string{"usr/", "usr/bin/", "usr/lib/", "usr/share/", "etc/", "var/", "var/log/", "tmp/"}
+	for _, d := range skeletonDirs {
+		entries = append(entries, tarEntry{hdr: tar.Header{
+			Typeflag: tar.TypeDir, Name: d, Mode: 0o755,
+			Uid: 0, Gid: 0, ModTime: benchEpoch,
+		}})
+	}
+
+	// Generate payload data (reused across entries to avoid huge allocations).
+	fileData := make([]byte, s.fileSize)
+	for i := range fileData {
+		fileData[i] = byte(i % 251)
+	}
+
+	var regularFiles []string
+
+	for d := 0; d < s.dirs; d++ {
+		dirName := fmt.Sprintf("pkg%04d/", d)
+		entries = append(entries, tarEntry{hdr: tar.Header{
+			Typeflag: tar.TypeDir, Name: dirName, Mode: 0o755,
+			Uid: 1000, Gid: 1000, ModTime: benchEpoch,
+		}})
+
+		for f := 0; f < s.filesPerDir; f++ {
+			name := fmt.Sprintf("%sfile%04d.dat", dirName, f)
+			regularFiles = append(regularFiles, name)
+
+			var pax map[string]string
+			if f%10 == 0 {
+				// Occasionally add an xattr (capabilities).
+				pax = map[string]string{"SCHILY.xattr.security.capability": "\x01\x00\x00\x02\x00 \x00\x00"}
+			}
+
+			e := tarEntry{
+				hdr: tar.Header{
+					Typeflag:   tar.TypeReg,
+					Name:       name,
+					Size:       int64(s.fileSize),
+					Mode:       0o644,
+					Uid:        1000,
+					Gid:        1000,
+					ModTime:    benchEpoch,
+					PAXRecords: pax,
+				},
+				data: fileData,
+			}
+			entries = append(entries, e)
+		}
+	}
+
+	// Add hard links.
+	hlCount := int(float64(len(regularFiles)) * s.hardLinkFraction)
+	for i := 0; i < hlCount && i < len(regularFiles); i++ {
+		target := regularFiles[i]
+		linkName := fmt.Sprintf("links/hardlink%04d", i)
+		// Ensure the links/ directory exists (add it once).
+		if i == 0 {
+			entries = append(entries, tarEntry{hdr: tar.Header{
+				Typeflag: tar.TypeDir, Name: "links/", Mode: 0o755,
+				Uid: 0, Gid: 0, ModTime: benchEpoch,
+			}})
+		}
+		entries = append(entries, tarEntry{hdr: tar.Header{
+			Typeflag: tar.TypeLink, Name: linkName, Linkname: target,
+			Uid: 0, Gid: 0, ModTime: benchEpoch,
+		}})
+	}
+
+	// Add symlinks.
+	if len(regularFiles) > 0 {
+		for i := 0; i < s.symlinks; i++ {
+			target := regularFiles[i%len(regularFiles)]
+			entries = append(entries, tarEntry{hdr: tar.Header{
+				Typeflag: tar.TypeSymlink,
+				Name:     fmt.Sprintf("symlinks/link%04d", i),
+				Linkname: "/" + target,
+				Mode:     0o777,
+				ModTime:  benchEpoch,
+			}})
+		}
+		// Make sure the symlinks/ dir was emitted first.
+		symlinkDir := tarEntry{hdr: tar.Header{
+			Typeflag: tar.TypeDir, Name: "symlinks/", Mode: 0o755,
+			Uid: 0, Gid: 0, ModTime: benchEpoch,
+		}}
+		// Prepend before the symlink entries by splicing.
+		// Find the first symlink entry index.
+		firstSym := len(entries) - s.symlinks
+		if firstSym < 0 {
+			firstSym = 0
+		}
+		rest := make([]tarEntry, len(entries)-firstSym)
+		copy(rest, entries[firstSym:])
+		entries = append(entries[:firstSym], symlinkDir)
+		entries = append(entries, rest...)
+	}
+
+	return entries
+}
+
+// buildTarBytes serialises entries to an in-memory tar.
+func buildTarBytes(t testing.TB, entries []tarEntry) []byte {
+	t.Helper()
+	var out bytes.Buffer
+	tw := tar.NewWriter(&out)
+	for _, e := range entries {
+		hdr := e.hdr // copy so we don't mutate
+		if err := tw.WriteHeader(&hdr); err != nil {
+			t.Fatalf("WriteHeader %s: %v", e.hdr.Name, err)
+		}
+		if len(e.data) > 0 {
+			if _, err := tw.Write(e.data); err != nil {
+				t.Fatalf("Write %s: %v", e.hdr.Name, err)
+			}
+		}
+	}
+	if err := tw.Close(); err != nil {
+		t.Fatalf("tar Close: %v", err)
+	}
+	return out.Bytes()
+}
+
+// discardWriter is an io.WriteSeeker that discards output but tracks position.
+type discardWriter struct{ pos int64 }
+
+func (d *discardWriter) Write(p []byte) (int, error) { d.pos += int64(len(p)); return len(p), nil }
+func (d *discardWriter) Seek(offset int64, whence int) (int64, error) {
+	switch whence {
+	case io.SeekStart:
+		d.pos = offset
+	case io.SeekCurrent:
+		d.pos += offset
+	case io.SeekEnd:
+		// Not used by writer for the final seek.
+		d.pos = offset
+	}
+	return d.pos, nil
+}
+
+// --- Benchmarks ---
+
+var workloads = []workload{
+	{"Small", smallWorkload},
+	{"Medium", mediumWorkload},
+	{"Large", largeWorkload},
+	{"Huge", hugeWorkload},
+}
+
+// makeMergeLayer2 builds a second tar layer over the given base entries:
+// 20% new files + whiteouts for every 20th regular file in the base.
+func makeMergeLayer2(b testing.TB, base []tarEntry) []byte {
+	b.Helper()
+	var layer2 []tarEntry
+	layer2 = append(layer2, tarEntry{hdr: tar.Header{
+		Typeflag: tar.TypeDir, Name: "layer2/", Mode: 0o755, ModTime: benchEpoch,
+	}})
+	fileData := make([]byte, 512)
+	for i := 0; i < len(base)/5; i++ {
+		layer2 = append(layer2, tarEntry{
+			hdr: tar.Header{
+				Typeflag: tar.TypeReg,
+				Name:     fmt.Sprintf("layer2/newfile%04d", i),
+				Size:     int64(len(fileData)), Mode: 0o644, ModTime: benchEpoch,
+			},
+			data: fileData,
+		})
+	}
+	for i, e := range base {
+		if i%20 == 1 && e.hdr.Typeflag == tar.TypeReg {
+			// Construct whiteout path: same directory, .wh. prefix on filename.
+			p := e.hdr.Name
+			slash := len(p) - 1
+			for slash >= 0 && p[slash] != '/' {
+				slash--
+			}
+			dir, name := p[:slash+1], p[slash+1:]
+			layer2 = append(layer2, tarEntry{hdr: tar.Header{
+				Typeflag: tar.TypeReg,
+				Name:     dir + ".wh." + name,
+				ModTime:  benchEpoch,
+			}})
+		}
+	}
+	return buildTarBytes(b, layer2)
+}
+
+// BenchmarkConvert benchmarks tarconv.Apply across all workload sizes.
+// Reports throughput in MB/s of tar input processed.
+func BenchmarkConvert(b *testing.B) {
+	for _, wl := range workloads {
+		wl := wl
+		b.Run(wl.name, func(b *testing.B) {
+			tarData := buildTarBytes(b, wl.entries())
+			b.SetBytes(int64(len(tarData)))
+			// Validate the image once before the timed loop.
+			if b.N > 0 {
+				dw := &buf{}
+				w := erofs.Create(dw)
+				if err := tarconv.Apply(w, bytes.NewReader(tarData)); err != nil {
+					b.Fatalf("Convert (validation): %v", err)
+				}
+				if err := w.Close(); err != nil {
+					b.Fatalf("Close (validation): %v", err)
+				}
+				fsckErofsBytes(b, dw.b)
+			}
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				dw := &discardWriter{}
+				w := erofs.Create(dw)
+				if err := tarconv.Apply(w, bytes.NewReader(tarData)); err != nil {
+					b.Fatalf("Convert: %v", err)
+				}
+				if err := w.Close(); err != nil {
+					b.Fatalf("Close: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkMerge benchmarks tarconv.Apply(WithMerge) (two-layer merge).
+// Layer 1 is the base; layer 2 adds new files and whiteouts.
+func BenchmarkMerge(b *testing.B) {
+	for _, wl := range workloads {
+		wl := wl
+		b.Run(wl.name, func(b *testing.B) {
+			base := wl.entries()
+			layer1 := buildTarBytes(b, base)
+			layer2 := makeMergeLayer2(b, base)
+			b.SetBytes(int64(len(layer1) + len(layer2)))
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				dw := &discardWriter{}
+				w := erofs.Create(dw)
+			if err := tarconv.Apply(w, bytes.NewReader(layer1), tarconv.WithMerge()); err != nil {
+				b.Fatalf("Apply(WithMerge) layer1: %v", err)
+			}
+			if err := tarconv.Apply(w, bytes.NewReader(layer2), tarconv.WithMerge()); err != nil {
+				b.Fatalf("Apply(WithMerge) layer2: %v", err)
+			}
+				if err := w.Close(); err != nil {
+					b.Fatalf("Close: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkMkfsConvert benchmarks mkfs.erofs --tar=f as a reference point.
+// Skipped if mkfs.erofs is not in PATH. Uses the same fixed timestamp as
+// other tests for fair comparison. Reports throughput in MB/s of tar input.
+//
+// Note: mkfs.erofs writes to a real file on disk and has fork/exec overhead
+// per iteration. The throughput figures will be lower than tar.Convert for
+// small inputs due to spawn cost, but converge as tar size grows.
+func BenchmarkMkfsConvert(b *testing.B) {
+	if _, err := exec.LookPath("mkfs.erofs"); err != nil {
+		b.Skip("mkfs.erofs not found in PATH")
+	}
+	for _, wl := range workloads {
+		wl := wl
+		b.Run(wl.name, func(b *testing.B) {
+			tarData := buildTarBytes(b, wl.entries())
+			outFile, err := os.CreateTemp("", "mkfs-bench-*.erofs")
+			if err != nil {
+				b.Fatal(err)
+			}
+			outPath := outFile.Name()
+			_ = outFile.Close()
+			defer os.Remove(outPath)
+
+			b.SetBytes(int64(len(tarData)))
+			b.ResetTimer()
+
+			ctx := context.Background()
+			for i := 0; i < b.N; i++ {
+				if err := convertTarMkfs(ctx, b, tarData, outPath, nil); err != nil {
+					b.Fatalf("mkfs.erofs: %v", err)
+				}
+				_ = os.Remove(outPath)
+			}
+		})
+	}
+}
diff --git a/tarconv/compare_test.go b/tarconv/compare_test.go
new file mode 100644
index 0000000..ea39dab
--- /dev/null
+++ b/tarconv/compare_test.go
@@ -0,0 +1,1217 @@
+package tarconv_test
+
+// Image comparison tests.
+//
+// These tests build the same tar with both tar.Convert and mkfs.erofs, then
+// walk both images with the go-erofs reader and assert that every entry has
+// identical: type, permissions (rawMode), uid, gid, mtime, size, file
+// content, symlink target, rdev, and xattrs.
+//
+// Inode numbers (nid) and block layout are deliberately excluded: they are
+// implementation-specific and will legitimately differ.
+//
+// All builds use a fixed timestamp (-T / WithBuildTime) so mtime values are
+// deterministic.
+
+import (
+	"archive/tar"
+	"bytes"
+	"context"
+	"io"
+	"io/fs"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strings"
+	"testing"
+	"time"
+
+	erofs "github.com/erofs/go-erofs"
+
+	"github.com/containerd/continuity/tarconv"
+)
+
+// fixedBuildTime is used for all comparison builds so mtime is deterministic.
+var fixedBuildTime = time.Unix(1700000000, 0)
+var fixedBuildTimeStr = "1700000000"
+
+// lstater is the interface for lstat on the erofs image.
+type lstater interface {
+	Lstat(name string) (fs.FileInfo, error)
+}
+
+// readLinker is the interface for reading symlink targets.
+type readLinker interface {
+	ReadLink(name string) (string, error)
+}
+
+// readDirer is the interface for reading directory contents.
+type readDirer interface {
+	ReadDir(name string) ([]fs.DirEntry, error)
+}
+
+// buildGoImage builds an EROFS image using tarconv.Apply (default convert-whiteouts mode).
+// The build time is set to fixedBuildTime so compact inodes match mkfs.erofs -T output.
+func buildGoImage(t testing.TB, tarData []byte) []byte {
+	t.Helper()
+	out := &buf{}
+	w := erofs.Create(out,
+		erofs.WithBuildTime(uint64(fixedBuildTime.Unix()), 0),
+	)
+	if err := tarconv.Apply(w, bytes.NewReader(tarData)); err != nil {
+		t.Fatalf("Convert: %v", err)
+	}
+	if err := w.Close(); err != nil {
+		t.Fatalf("Writer.Close: %v", err)
+	}
+	return out.b
+}
+
+// buildMkfsImage builds an EROFS image using mkfs.erofs.
+// Skips the test if mkfs.erofs is not in PATH.
+// Uses --T (fixed build time), --aufs, --tar=f, -Enoinline_data.
+func buildMkfsImage(t testing.TB, tarData []byte) []byte {
+	t.Helper()
+	if _, err := exec.LookPath("mkfs.erofs"); err != nil {
+		t.Skip("mkfs.erofs not found in PATH")
+	}
+
+	// Write tar to a temp file (mkfs.erofs reads from stdin via --tar=f).
+	tarFile, err := os.CreateTemp("", "compare-*.tar")
+	if err != nil {
+		t.Fatalf("create tar temp: %v", err)
+	}
+	defer os.Remove(tarFile.Name())
+	if _, err := tarFile.Write(tarData); err != nil {
+		tarFile.Close()
+		t.Fatalf("write tar temp: %v", err)
+	}
+	if _, err := tarFile.Seek(0, io.SeekStart); err != nil {
+		tarFile.Close()
+		t.Fatalf("seek tar temp: %v", err)
+	}
+
+	outFile, err := os.CreateTemp("", "compare-*.erofs")
+	if err != nil {
+		tarFile.Close()
+		t.Fatalf("create img temp: %v", err)
+	}
+	outPath := outFile.Name()
+	outFile.Close()
+	defer os.Remove(outPath)
+
+	// -T sets the EROFS image build time (compact-inode threshold).
+	// Do NOT pass --all-time: that would override per-entry mtimes from the tar,
+	// causing all entries to show the build time instead of their own mtime.
+	args := []string{
+		"--tar=f",
+		"--aufs",
+		"--quiet",
+		"-Enoinline_data",
+		"-T" + fixedBuildTimeStr,
+		outPath,
+	}
+	cmd := exec.CommandContext(context.Background(), "mkfs.erofs", args...)
+	cmd.Stdin = tarFile
+	out, err := cmd.CombinedOutput()
+	tarFile.Close()
+	if err != nil {
+		t.Fatalf("mkfs.erofs: %v\n%s", err, out)
+	}
+
+	imgBytes, err := os.ReadFile(outPath)
+	if err != nil {
+		t.Fatalf("read mkfs image: %v", err)
+	}
+	return imgBytes
+}
+
+// fsckImage runs fsck.erofs on data (writes to a temp file). Skips if
+// fsck.erofs is not in PATH. Calls t.Errorf on failure (not Fatal so other
+// checks can run).
+func fsckImageBytes(t testing.TB, label string, data []byte) {
+	t.Helper()
+	if _, err := exec.LookPath("fsck.erofs"); err != nil {
+		return
+	}
+	f, err := os.CreateTemp("", "fsck-*.erofs")
+	if err != nil {
+		t.Errorf("%s fsck: create temp: %v", label, err)
+		return
+	}
+	defer os.Remove(f.Name())
+	if _, err := f.Write(data); err != nil {
+		f.Close()
+		t.Errorf("%s fsck: write: %v", label, err)
+		return
+	}
+	f.Close()
+	out, err := exec.Command("fsck.erofs", f.Name()).CombinedOutput()
+	if err != nil {
+		t.Errorf("%s fsck.erofs FAILED: %v\n%s", label, err, out)
+	}
+}
+
+// imageEntry is a fully normalized representation of one filesystem entry,
+// collected via the go-erofs reader. Every field that can be compared between
+// two images derived from the same tar source is stored here.
+type imageEntry struct {
+	path    string
+	rawMode uint16 // Unix mode bits (type + perms + special) from erofs.Stat
+	uid     uint32
+	gid     uint32
+	mtime   uint64
+	mtimeNs uint32
+	size    int64
+	rdev    uint32
+	nlink   int    // exact nlink value — must match between images
+	symlink string // target for symlinks, empty otherwise
+	xattrs  map[string]string
+	// dirChildren holds the ordered list of child names as returned by ReadDir.
+	// Order matters: EROFS stores directory entries sorted, so both images
+	// should report identical order for the same directory contents.
+	dirChildren []string
+	// content holds the full file data for regular files. Always read in full
+	// regardless of size so content correctness is always verified.
+	content []byte
+}
+
+// collectImage walks an EROFS image opened with erofs.Open and returns a
+// sorted slice of imageEntry for every path including the root (".").
+//
+// It uses Lstat (not Stat) for every entry so symlinks are captured as-is.
+// It reads every regular file in full so content is always compared.
+// It records the ReadDir order of every directory so ordering is compared.
+func collectImage(t testing.TB, img fs.FS, label string) []imageEntry {
+	t.Helper()
+
+	ls, ok := img.(lstater)
+	if !ok {
+		t.Fatalf("%s: image does not implement Lstat", label)
+	}
+	rl, _ := img.(readLinker)
+	rd, ok := img.(readDirer)
+	if !ok {
+		t.Fatalf("%s: image does not implement ReadDir", label)
+	}
+
+	var entries []imageEntry
+
+	// Collect one entry. path is the fs.FS path (relative, no leading slash).
+	// "." refers to the root directory.
+	collect := func(p string) {
+		var fi fs.FileInfo
+		var err error
+		if p == "." {
+			fi, err = ls.Lstat(".")
+		} else {
+			fi, err = ls.Lstat(p)
+		}
+		if err != nil {
+			t.Errorf("%s Lstat %q: %v", label, p, err)
+			return
+		}
+		st, ok := fi.Sys().(*erofs.Stat)
+		if !ok {
+			t.Errorf("%s %q: Sys() is %T not *erofs.Stat", label, p, fi.Sys())
+			return
+		}
+
+		e := imageEntry{
+			path:    p,
+			rawMode: goModeToRaw(st.Mode),
+			uid:     st.UID,
+			gid:     st.GID,
+			mtime:   st.Mtime,
+			mtimeNs: st.MtimeNs,
+			size:    fi.Size(),
+			rdev:    st.Rdev,
+			nlink:   st.Nlink,
+			xattrs:  st.Xattrs,
+		}
+
+		if fi.Mode()&fs.ModeSymlink != 0 && rl != nil {
+			target, err := rl.ReadLink(p)
+			if err != nil {
+				t.Errorf("%s ReadLink %q: %v", label, p, err)
+			}
+			e.symlink = target
+		}
+
+		if fi.Mode().IsDir() {
+			des, err := rd.ReadDir(p)
+			if err != nil {
+				t.Errorf("%s ReadDir %q: %v", label, p, err)
+			} else {
+				e.dirChildren = make([]string, len(des))
+				for i, de := range des {
+					e.dirChildren[i] = de.Name()
+				}
+			}
+		}
+
+		if fi.Mode().IsRegular() && fi.Size() > 0 {
+			f, err := img.Open(p)
+			if err != nil {
+				t.Errorf("%s Open %q: %v", label, p, err)
+			} else {
+				data, err := io.ReadAll(f)
+				f.Close()
+				if err != nil {
+					t.Errorf("%s ReadAll %q: %v", label, p, err)
+				} else {
+					e.content = data
+				}
+			}
+		}
+
+		entries = append(entries, e)
+	}
+
+	// Walk using fs.WalkDir which uses Stat (follows symlinks for type), but we
+	// want to visit symlinks as entries too. Use a manual recursive walk that
+	// calls Lstat directly so we see symlinks as-is.
+	var walk func(dir string)
+	walk = func(dir string) {
+		des, err := rd.ReadDir(dir)
+		if err != nil {
+			t.Errorf("%s ReadDir %q: %v", label, dir, err)
+			return
+		}
+		for _, de := range des {
+			var p string
+			if dir == "." {
+				p = de.Name()
+			} else {
+				p = dir + "/" + de.Name()
+			}
+			collect(p)
+			// Recurse into real directories only (not symlinks to dirs).
+			if de.Type().IsDir() {
+				walk(p)
+			}
+		}
+	}
+
+	// Include the root itself.
+	collect(".")
+	walk(".")
+
+	sort.Slice(entries, func(i, j int) bool { return entries[i].path < entries[j].path })
+	return entries
+}
+
+// goModeToRaw converts a Go fs.FileMode (as returned by erofs.Stat.Mode, which
+// uses EroFSModeToGoFileMode and correctly carries ModeSetuid/Sticky/etc) back
+// to Unix mode bits for comparison. This is the inverse of EroFSModeToGoFileMode.
+func goModeToRaw(m fs.FileMode) uint16 {
+	var raw uint16
+	raw |= uint16(m.Perm())
+	if m&fs.ModeSetuid != 0 {
+		raw |= 0o4000
+	}
+	if m&fs.ModeSetgid != 0 {
+		raw |= 0o2000
+	}
+	if m&fs.ModeSticky != 0 {
+		raw |= 0o1000
+	}
+	switch m.Type() {
+	case fs.ModeDir:
+		raw |= 0o040000
+	case fs.ModeSymlink:
+		raw |= 0o120000
+	case fs.ModeDevice | fs.ModeCharDevice:
+		raw |= 0o020000
+	case fs.ModeDevice:
+		raw |= 0o060000
+	case fs.ModeNamedPipe:
+		raw |= 0o010000
+	case fs.ModeSocket:
+		raw |= 0o140000
+	default: // regular file
+		raw |= 0o100000
+	}
+	return raw
+}
+
+// isDeviceType returns true if rawMode describes a character or block device.
+func isDeviceType(rawMode uint16) bool {
+	typ := rawMode & 0xF000
+	return typ == 0o020000 || typ == 0o060000
+}
+
+// compareImages asserts that two EROFS images contain exactly the same
+// filesystem: same paths, same metadata on every entry, same file content,
+// same directory child order, same xattrs. Differences are reported via
+// t.Errorf so all mismatches are collected before the test fails.
+func compareImages(t testing.TB, goImg, mkfsImg []byte) {
+	t.Helper()
+
+	goFS, err := erofs.Open(bytes.NewReader(goImg))
+	if err != nil {
+		t.Fatalf("open go image: %v", err)
+	}
+	mkFS, err := erofs.Open(bytes.NewReader(mkfsImg))
+	if err != nil {
+		t.Fatalf("open mkfs image: %v", err)
+	}
+
+	goEntries := collectImage(t, goFS, "go")
+	mkEntries := collectImage(t, mkFS, "mkfs")
+
+	// Build path-keyed maps for fast lookup.
+	goMap := make(map[string]imageEntry, len(goEntries))
+	for _, e := range goEntries {
+		goMap[e.path] = e
+	}
+	mkMap := make(map[string]imageEntry, len(mkEntries))
+	for _, e := range mkEntries {
+		mkMap[e.path] = e
+	}
+
+	// Every path in go image must exist in mkfs image with identical fields.
+	for _, ge := range goEntries {
+		me, ok := mkMap[ge.path]
+		if !ok {
+			t.Errorf("path %q: in go image but missing from mkfs image", ge.path)
+			continue
+		}
+		diffEntries(t, ge.path, ge, me)
+	}
+
+	// Every path in mkfs image must exist in go image.
+	for _, me := range mkEntries {
+		if _, ok := goMap[me.path]; !ok {
+			t.Errorf("path %q: in mkfs image but missing from go image", me.path)
+		}
+	}
+}
+
+// diffEntries reports every difference between two imageEntry values for the
+// same path. All fields are compared exactly unless noted.
+func diffEntries(t testing.TB, p string, got, want imageEntry) {
+	t.Helper()
+
+	// Mode: compare full unix bits (type + perms + special bits).
+	if got.rawMode != want.rawMode {
+		t.Errorf("%s: mode: go=0o%o mkfs=0o%o", p, got.rawMode, want.rawMode)
+	}
+	if got.uid != want.uid {
+		t.Errorf("%s: uid: go=%d mkfs=%d", p, got.uid, want.uid)
+	}
+	if got.gid != want.gid {
+		t.Errorf("%s: gid: go=%d mkfs=%d", p, got.gid, want.gid)
+	}
+	if got.mtime != want.mtime {
+		t.Errorf("%s: mtime: go=%d mkfs=%d", p, got.mtime, want.mtime)
+	}
+	// mtimeNs: compare only when both are non-zero; mkfs.erofs may not
+	// preserve sub-second precision in all versions.
+	if got.mtimeNs != 0 && want.mtimeNs != 0 && got.mtimeNs != want.mtimeNs {
+		t.Errorf("%s: mtime_ns: go=%d mkfs=%d", p, got.mtimeNs, want.mtimeNs)
+	}
+	if got.size != want.size {
+		t.Errorf("%s: size: go=%d mkfs=%d", p, got.size, want.size)
+	}
+	if got.symlink != want.symlink {
+		t.Errorf("%s: symlink target: go=%q mkfs=%q", p, got.symlink, want.symlink)
+	}
+	// rdev: compare for device nodes only.
+	if isDeviceType(got.rawMode) && got.rdev != want.rdev {
+		t.Errorf("%s: rdev: go=%d mkfs=%d", p, got.rdev, want.rdev)
+	}
+	// nlink: exact comparison. Both images are built from the same tar so every
+	// hard-link group must have the same nlink count.
+	if got.nlink != want.nlink {
+		t.Errorf("%s: nlink: go=%d mkfs=%d", p, got.nlink, want.nlink)
+	}
+	// xattrs: exact match — same keys, same values, no extras on either side.
+	for k, gv := range got.xattrs {
+		mv, ok := want.xattrs[k]
+		if !ok {
+			t.Errorf("%s: xattr %q in go image, absent in mkfs image", p, k)
+		} else if gv != mv {
+			t.Errorf("%s: xattr %q: go=%q mkfs=%q", p, k, gv, mv)
+		}
+	}
+	for k := range want.xattrs {
+		if _, ok := got.xattrs[k]; !ok {
+			t.Errorf("%s: xattr %q in mkfs image, absent in go image", p, k)
+		}
+	}
+	// Directory child order: EROFS always stores entries lexicographically, so
+	// both images must report the same order.
+	if len(got.dirChildren) != len(want.dirChildren) {
+		t.Errorf("%s: dir child count: go=%d mkfs=%d (%v vs %v)",
+			p, len(got.dirChildren), len(want.dirChildren), got.dirChildren, want.dirChildren)
+	} else {
+		for i := range got.dirChildren {
+			if got.dirChildren[i] != want.dirChildren[i] {
+				t.Errorf("%s: dir child[%d]: go=%q mkfs=%q", p, i, got.dirChildren[i], want.dirChildren[i])
+			}
+		}
+	}
+	// File content: exact byte comparison.
+	if !bytes.Equal(got.content, want.content) {
+		n := 64
+		if len(got.content) < n {
+			n = len(got.content)
+		}
+		wn := n
+		if len(want.content) < wn {
+			wn = len(want.content)
+		}
+		t.Errorf("%s: content mismatch (len go=%d mkfs=%d); go[:%d]=%x mkfs[:%d]=%x",
+			p, len(got.content), len(want.content), n, got.content[:n], wn, want.content[:wn])
+	}
+}
+
+// buildComparisonTar creates a comprehensive deterministic tar that exercises
+// every path through tar.Convert and every erofs.Writer call it makes:
+//
+//   - Directories with varied uid/gid/mtime/mode including sticky bits
+//     (forces Mkdir + Chown + Chtimes + Chmod on dirs)
+//   - Regular files with varied uid/gid/mtime/mode including setuid/setgid
+//     (forces Create + Chown + Chtimes + Chmod on files)
+//   - Regular files with PAX xattrs on multiple entry types
+//     (forces Setxattr on files, dirs, symlinks, and device nodes)
+//   - A 3-way hard-link group (canonical + 2 aliases, nlink=3)
+//     (forces Link x2 and exact nlink=3 match)
+//   - A 2-way hard-link group in a different directory (cross-dir links)
+//   - Symlinks with non-root uid/gid and non-default mtime
+//     (forces Chown + Chtimes on symlinks)
+//   - An opaque directory (.wh..wh..opq) which must appear in both images
+//     as trusted.overlay.opaque=y + trusted.overlay.origin=""
+//   - A plain whiteout (.wh.<name>) which must appear as a char device 0/0
+//   - Char device (major/minor), block device (major/minor), FIFO
+//     (forces Mknod for all three types)
+//   - A multi-block file whose content spans more than one EROFS block
+//   - An empty regular file
+//
+// Every directory has an explicit entry in the tar so root metadata is
+// deterministic across both converters.
+func buildComparisonTar(t testing.TB) []byte {
+	t.Helper()
+
+	// Use a single timestamp for all entries. mkfs.erofs 1.9 applies its -T
+	// build time to every entry regardless of per-entry tar mtime, so a
+	// deterministic comparison requires matching timestamps throughout.
+	// Chown/Chmod/Setxattr are verified via uid/gid/mode/xattr fields, not mtime.
+	ts := fixedBuildTime // 1700000000
+
+	return makeTar(t, func(tw *tar.Writer) {
+		must := func(err error) {
+			t.Helper()
+			if err != nil {
+				t.Fatalf("write tar: %v", err)
+			}
+		}
+		hdr := func(h tar.Header) { must(tw.WriteHeader(&h)) }
+		data := func(b []byte) { _, err := tw.Write(b); must(err) }
+
+		// --- Root and top-level directories ---
+		// Root: uid=0 gid=0, ts
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+
+		// bin/: uid=0 gid=0 — standard mode
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "bin/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+
+		// etc/: uid=0 gid=0 — different mtime to exercise Chtimes
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "etc/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+
+		// usr/ and usr/bin/ owned by uid=0 gid=0
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "usr/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "usr/bin/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+
+		// lib/ and lib/shared/: uid=0, gid=0, different timestamps
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "lib/shared/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+
+		// home/: uid=0, gid=0, ts
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "home/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		// home/user/: non-root uid/gid, restricted perms — exercises Chown on dir
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "home/user/", Mode: 0o700, Uid: 1000, Gid: 1000, ModTime: ts})
+
+		// tmp/: sticky bit (0o1777) — exercises Chmod for special bits on dir
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "tmp/", Mode: 0o1777, Uid: 0, Gid: 0, ModTime: ts})
+
+		// var/ and var/log/: gid=4 (adm), exercises Chown with non-standard gid
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "var/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "var/log/", Mode: 0o755, Uid: 0, Gid: 4, ModTime: ts})
+
+		// dev/: uid=0, gid=0 — must be explicit so metadata matches mkfs.erofs
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "dev/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+
+		// --- Regular files: varied uid/gid/mtime/mode ---
+
+		// etc/hostname: uid=0, gid=0, ts, 0o644
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "etc/hostname", Size: 8, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+		data([]byte("myhost\n\n"))
+
+		// etc/shadow: uid=0, gid=42 (shadow), ts, 0o640 — Chown with non-root gid
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "etc/shadow", Size: 5, Mode: 0o640, Uid: 0, Gid: 42, ModTime: ts})
+		data([]byte("root:"))
+
+		// etc/motd: empty file, different mtime
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "etc/motd", Size: 0, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+
+		// bin/sudo: setuid (0o4755) — exercises Chmod for setuid bit
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "bin/sudo", Size: 4, Mode: 0o4755, Uid: 0, Gid: 0, ModTime: ts})
+		data([]byte("sudo"))
+
+		// bin/wall: setgid (0o2755), gid=5 (tty) — exercises Chmod for setgid + Chown
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "bin/wall", Size: 4, Mode: 0o2755, Uid: 0, Gid: 5, ModTime: ts})
+		data([]byte("wall"))
+
+		// bin/ping: capability xattr + ts + uid=0 gid=0 — exercises Setxattr on regular file
+		hdr(tar.Header{
+			Typeflag: tar.TypeReg, Name: "bin/ping", Size: 4, Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts,
+			PAXRecords: map[string]string{
+				"SCHILY.xattr.security.capability": "\x01\x00\x00\x02\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+				"SCHILY.xattr.user.role":           "network-tool",
+			},
+		})
+		data([]byte("ping"))
+
+		// usr/bin/env: uid=0 gid=0 ts — plain executable
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "usr/bin/env", Size: 3, Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		data([]byte("env"))
+
+		// home/user/notes.txt: uid=1000 gid=1000 ts — non-root owner + Chtimes
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "home/user/notes.txt", Size: 5, Mode: 0o600, Uid: 1000, Gid: 1000, ModTime: ts})
+		data([]byte("hello"))
+
+		// home/user/bigfile: multi-block (>4096 bytes), uid=1000 gid=1000
+		bigData := make([]byte, 3*4096+512)
+		for i := range bigData { bigData[i] = byte(i % 251) }
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "home/user/bigfile", Size: int64(len(bigData)), Mode: 0o600, Uid: 1000, Gid: 1000, ModTime: ts})
+		data(bigData)
+
+		// var/log/syslog: uid=0 gid=4 ts — Chown with adm gid
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "var/log/syslog", Size: 0, Mode: 0o640, Uid: 0, Gid: 4, ModTime: ts})
+
+		// --- Symlinks ---
+
+		// bin/sh → /bin/busybox: uid=0 gid=0 ts
+		hdr(tar.Header{Typeflag: tar.TypeSymlink, Name: "bin/sh", Linkname: "/bin/busybox", Mode: 0o777, Uid: 0, Gid: 0, ModTime: ts})
+
+		// etc/localtime → /usr/share/zoneinfo/UTC: uid=0 gid=0 ts — Chtimes on symlink
+		hdr(tar.Header{Typeflag: tar.TypeSymlink, Name: "etc/localtime", Linkname: "/usr/share/zoneinfo/UTC", Mode: 0o777, Uid: 0, Gid: 0, ModTime: ts})
+
+		// home/user/link → ../usr/bin/env: non-root uid/gid — Chown on symlink
+		hdr(tar.Header{Typeflag: tar.TypeSymlink, Name: "home/user/myenv", Linkname: "../../usr/bin/env", Mode: 0o777, Uid: 1000, Gid: 1000, ModTime: ts})
+
+		// --- Hard links ---
+
+		// 3-way hard-link group: lib/shared/data.bin (canonical) + 2 aliases.
+		// nlink must be exactly 3 in both images.
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "lib/shared/data.bin", Size: 8, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+		data([]byte("sharedXX"))
+		hdr(tar.Header{Typeflag: tar.TypeLink, Name: "lib/shared/data.bin.1", Linkname: "lib/shared/data.bin", Uid: 0, Gid: 0, ModTime: ts})
+		hdr(tar.Header{Typeflag: tar.TypeLink, Name: "lib/shared/data.bin.2", Linkname: "lib/shared/data.bin", Uid: 0, Gid: 0, ModTime: ts})
+
+		// 2-way cross-directory hard link: canonical in etc/, alias in var/log/
+		// exercises Link across directory boundaries.
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "etc/group", Size: 6, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+		data([]byte("root:x"))
+		hdr(tar.Header{Typeflag: tar.TypeLink, Name: "var/log/group.bak", Linkname: "etc/group", Uid: 0, Gid: 0, ModTime: ts})
+
+		// --- Opaque directory ---
+		// app/ is opaque: it contains .wh..wh..opq which signals that any lower-layer
+		// contents of app/ are hidden. In Convert mode this sets
+		// trusted.overlay.opaque=y and trusted.overlay.origin="" on app/.
+		// The directory also has a file so the image is non-trivial.
+		hdr(tar.Header{Typeflag: tar.TypeDir, Name: "app/", Mode: 0o755, Uid: 1000, Gid: 1000, ModTime: ts})
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "app/.wh..wh..opq", Size: 0, Uid: 0, Gid: 0, ModTime: ts})
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "app/main", Size: 4, Mode: 0o755, Uid: 1000, Gid: 1000, ModTime: ts})
+		data([]byte("main"))
+
+		// --- Plain whiteout ---
+		// etc/.wh.removed-file converts to a char device 0/0 (mode 0) at etc/removed-file.
+		hdr(tar.Header{Typeflag: tar.TypeReg, Name: "etc/.wh.removed-file", Size: 0, Uid: 0, Gid: 0, ModTime: ts})
+
+		// --- Device nodes (Mknod) ---
+
+		// char device: /dev/null (1,3) — standard whiteout device
+		hdr(tar.Header{Typeflag: tar.TypeChar, Name: "dev/null", Mode: 0o666, Uid: 0, Gid: 0, Devmajor: 1, Devminor: 3, ModTime: ts})
+
+		// char device: /dev/zero (1,5)
+		hdr(tar.Header{Typeflag: tar.TypeChar, Name: "dev/zero", Mode: 0o666, Uid: 0, Gid: 0, Devmajor: 1, Devminor: 5, ModTime: ts})
+
+		// char device with non-root uid/gid and ts — exercises Chown+Chtimes on mknod
+		hdr(tar.Header{Typeflag: tar.TypeChar, Name: "dev/tty1", Mode: 0o620, Uid: 0, Gid: 5, Devmajor: 4, Devminor: 1, ModTime: ts})
+
+		// block device: /dev/sda (8,0) — exercises Mknod with block type
+		hdr(tar.Header{Typeflag: tar.TypeBlock, Name: "dev/sda", Mode: 0o660, Uid: 0, Gid: 6, Devmajor: 8, Devminor: 0, ModTime: ts})
+
+		// block device: /dev/sda1 (8,1)
+		hdr(tar.Header{Typeflag: tar.TypeBlock, Name: "dev/sda1", Mode: 0o660, Uid: 0, Gid: 6, Devmajor: 8, Devminor: 1, ModTime: ts})
+
+		// FIFO: uid=1000 gid=1000 — exercises Mknod for fifo + Chown
+		hdr(tar.Header{Typeflag: tar.TypeFifo, Name: "tmp/pipe", Mode: 0o600, Uid: 1000, Gid: 1000, ModTime: ts})
+
+		// Another FIFO with different permissions — confirms mode bits for fifo
+		hdr(tar.Header{Typeflag: tar.TypeFifo, Name: "tmp/ctrl", Mode: 0o640, Uid: 0, Gid: 1000, ModTime: ts})
+
+		// --- Directory with xattrs (SELinux label) ---
+		// var/log/ has an xattr, exercising Setxattr on a directory.
+		// We set it here by re-emitting var/log/ — the duplicate Mkdir is handled
+		// by the idempotent addDir path, and applyMetadata sets the xattr.
+		hdr(tar.Header{
+			Typeflag: tar.TypeDir, Name: "var/log/", Mode: 0o755, Uid: 0, Gid: 4, ModTime: ts,
+			PAXRecords: map[string]string{
+				"SCHILY.xattr.security.selinux": "system_u:object_r:var_log_t:s0\x00",
+			},
+		})
+	})
+}
+
+// ----------------------------------------------------------------------------
+// Comparison tests
+// ----------------------------------------------------------------------------
+
+// TestCompareWithMkfs builds the same tar with both tar.Convert and mkfs.erofs
+// and asserts the resulting images are semantically identical.
+func TestCompareWithMkfs(t *testing.T) {
+	tarData := buildComparisonTar(t)
+	goImg := buildGoImage(t, tarData)
+	mkfsImg := buildMkfsImage(t, tarData)
+
+	fsckImageBytes(t, "go", goImg)
+	fsckImageBytes(t, "mkfs", mkfsImg)
+	compareImages(t, goImg, mkfsImg)
+}
+
+// TestCompareWithMkfsSymlinkDir builds a tar containing a directory that is
+// also a symlink target, to exercise the Lstat path.
+func TestCompareWithMkfsSymlinkDir(t *testing.T) {
+	ts := fixedBuildTime
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "real/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "real/a", Size: 1, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+		tw.Write([]byte("a"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeSymlink, Name: "link", Linkname: "real", Mode: 0o777, Uid: 0, Gid: 0, ModTime: ts})
+	})
+	goImg := buildGoImage(t, tarData)
+	mkfsImg := buildMkfsImage(t, tarData)
+	fsckImageBytes(t, "go", goImg)
+	fsckImageBytes(t, "mkfs", mkfsImg)
+	compareImages(t, goImg, mkfsImg)
+}
+
+// TestCompareWithMkfsHardLinksOutOfOrder verifies that go-erofs produces a
+// valid image for out-of-order hard links and runs fsck on it. mkfs.erofs 1.9
+// does not support hard links whose target appears later in the tar stream
+// (it errors with ENOENT), so we only compare against our own image with fsck.
+func TestCompareWithMkfsHardLinksOutOfOrder(t *testing.T) {
+	ts := fixedBuildTime
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "a/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		// Link before target — mkfs.erofs cannot handle this.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "a/link", Linkname: "a/target", Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "a/target", Size: 5, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+		tw.Write([]byte("hello"))
+	})
+	goImg := buildGoImage(t, tarData)
+	fsckImageBytes(t, "go", goImg)
+
+	// Verify content via the go-erofs reader.
+	imgFS, err := erofs.Open(bytes.NewReader(goImg))
+	if err != nil {
+		t.Fatalf("Open: %v", err)
+	}
+	got, err := fs.ReadFile(imgFS, "a/target")
+	if err != nil {
+		t.Fatalf("ReadFile a/target: %v", err)
+	}
+	if string(got) != "hello" {
+		t.Errorf("a/target: got %q want hello", got)
+	}
+	got2, err := fs.ReadFile(imgFS, "a/link")
+	if err != nil {
+		t.Fatalf("ReadFile a/link: %v", err)
+	}
+	if string(got2) != "hello" {
+		t.Errorf("a/link: got %q want hello", got2)
+	}
+}
+
+// TestCompareWithMkfsWhiteouts builds a tar with OCI whiteout entries.
+// mkfs.erofs --aufs converts them to char devices too, so the outputs
+// should match.
+func TestCompareWithMkfsWhiteouts(t *testing.T) {
+	ts := fixedBuildTime
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh.removed.so", Size: 0, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh..wh..opq", Size: 0, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/present.so", Size: 4, Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.Write([]byte("lib!"))
+	})
+	goImg := buildGoImage(t, tarData)
+	mkfsImg := buildMkfsImage(t, tarData)
+	fsckImageBytes(t, "go", goImg)
+	fsckImageBytes(t, "mkfs", mkfsImg)
+	compareImages(t, goImg, mkfsImg)
+}
+
+// TestCompareWithMkfsUbuntuLike runs the full Ubuntu-shaped workload through
+// both converters and diffs the results.
+func TestCompareWithMkfsUbuntuLike(t *testing.T) {
+	ts := fixedBuildTime
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		dirs := []string{"bin/", "sbin/", "lib/", "lib/x86_64-linux-gnu/",
+			"etc/", "etc/apt/", "usr/", "usr/bin/", "usr/lib/", "var/", "var/log/"}
+		for _, d := range dirs {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: d, Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		}
+		for _, name := range []string{"bin/sh", "bin/ls", "sbin/init"} {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: name, Size: 4, Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+			tw.Write([]byte("fake"))
+		}
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/x86_64-linux-gnu/libc.so.6", Size: 8, Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.Write([]byte("libcdata"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "lib/libc.so.6", Linkname: "lib/x86_64-linux-gnu/libc.so.6", Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeSymlink, Name: "lib64", Linkname: "lib/x86_64-linux-gnu", Mode: 0o777, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "dev/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeChar, Name: "dev/null", Mode: 0o666, Uid: 0, Gid: 0, Devmajor: 1, Devminor: 3, ModTime: ts})
+		tw.WriteHeader(&tar.Header{
+			Typeflag: tar.TypeReg, Name: "usr/bin/ping", Size: 4, Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts,
+			PAXRecords: map[string]string{"SCHILY.xattr.security.capability": "\x01\x00\x00\x02\x00 \x00\x00"},
+		})
+		tw.Write([]byte("ping"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "var/log/syslog", Size: 0, Mode: 0o640, Uid: 0, Gid: 4, ModTime: ts})
+	})
+
+	goImg := buildGoImage(t, tarData)
+	mkfsImg := buildMkfsImage(t, tarData)
+	fsckImageBytes(t, "go", goImg)
+	fsckImageBytes(t, "mkfs", mkfsImg)
+	compareImages(t, goImg, mkfsImg)
+}
+
+// TestCompareFSWalk is the definitive filesystem equality test.
+//
+// It builds the same comprehensive tar with both tar.Convert and mkfs.erofs,
+// then walks both resulting images as fs.FS from root to leaves and asserts
+// exact equality at every node. This goes beyond the targeted metadata checks
+// above by also verifying:
+//   - total entry count is identical
+//   - directory child order is identical (EROFS sorts lexicographically)
+//   - every file's full byte content matches
+//   - the root directory (".") itself matches
+//   - every xattr present on either side is present on the other
+//   - nlink is exactly equal (not just ">= 2")
+//   - rdev is exactly equal for device nodes
+//   - the complete unix mode word (type + special + perm) matches
+func TestCompareFSWalk(t *testing.T) {
+	tarData := buildComparisonTar(t)
+	goImg := buildGoImage(t, tarData)
+	mkfsImg := buildMkfsImage(t, tarData)
+
+	// fsck both images first.
+	fsckImageBytes(t, "go", goImg)
+	fsckImageBytes(t, "mkfs", mkfsImg)
+
+	goFS, err := erofs.Open(bytes.NewReader(goImg))
+	if err != nil {
+		t.Fatalf("open go image: %v", err)
+	}
+	mkFS, err := erofs.Open(bytes.NewReader(mkfsImg))
+	if err != nil {
+		t.Fatalf("open mkfs image: %v", err)
+	}
+
+	goEntries := collectImage(t, goFS, "go")
+	mkEntries := collectImage(t, mkFS, "mkfs")
+
+	// The sorted entry slices must have the same length.
+	if len(goEntries) != len(mkEntries) {
+		t.Errorf("entry count mismatch: go=%d mkfs=%d", len(goEntries), len(mkEntries))
+		// Still print which paths differ.
+		goSet := make(map[string]bool, len(goEntries))
+		for _, e := range goEntries {
+			goSet[e.path] = true
+		}
+		mkSet := make(map[string]bool, len(mkEntries))
+		for _, e := range mkEntries {
+			mkSet[e.path] = true
+		}
+		for _, e := range goEntries {
+			if !mkSet[e.path] {
+				t.Errorf("  go-only path: %q", e.path)
+			}
+		}
+		for _, e := range mkEntries {
+			if !goSet[e.path] {
+				t.Errorf("  mkfs-only path: %q", e.path)
+			}
+		}
+	}
+
+	// Walk in parallel sorted order and compare entry by entry.
+	i, j := 0, 0
+	for i < len(goEntries) && j < len(mkEntries) {
+		ge := goEntries[i]
+		me := mkEntries[j]
+		switch {
+		case ge.path == me.path:
+			diffEntries(t, ge.path, ge, me)
+			i++
+			j++
+		case ge.path < me.path:
+			t.Errorf("path %q: in go image only", ge.path)
+			i++
+		default:
+			t.Errorf("path %q: in mkfs image only", me.path)
+			j++
+		}
+	}
+	for ; i < len(goEntries); i++ {
+		t.Errorf("path %q: in go image only (tail)", goEntries[i].path)
+	}
+	for ; j < len(mkEntries); j++ {
+		t.Errorf("path %q: in mkfs image only (tail)", mkEntries[j].path)
+	}
+}
+
+// TestCompareWithMkfsHardLinks builds a single-layer tar with a variety of
+// hard-link configurations, converts it with both tarconv.Apply (default mode)
+// and mkfs.erofs, and asserts the resulting images are identical.
+//
+// Covered cases:
+//   - 2-way hard link (canonical + 1 alias), nlink=2
+//   - 3-way hard link (canonical + 2 aliases), nlink=3
+//   - Cross-directory hard link (alias in a different dir from canonical)
+//   - Hard link to a file with non-root uid/gid (Chown applied to canonical
+//     must be reflected on all aliases)
+func TestCompareWithMkfsHardLinks(t *testing.T) {
+	ts := fixedBuildTime
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "a/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "b/", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+
+		// 2-way: a/one → a/one-link
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "a/one", Size: 3, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+		tw.Write([]byte("one"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "a/one-link", Linkname: "a/one", Uid: 0, Gid: 0, ModTime: ts})
+
+		// 3-way: a/three, a/three-1, a/three-2 — nlink must be exactly 3
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "a/three", Size: 5, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+		tw.Write([]byte("three"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "a/three-1", Linkname: "a/three", Uid: 0, Gid: 0, ModTime: ts})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "a/three-2", Linkname: "a/three", Uid: 0, Gid: 0, ModTime: ts})
+
+		// cross-directory: canonical in a/, alias in b/
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "a/shared", Size: 6, Mode: 0o755, Uid: 1000, Gid: 1000, ModTime: ts})
+		tw.Write([]byte("shared"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "b/shared", Linkname: "a/shared", Uid: 1000, Gid: 1000, ModTime: ts})
+	})
+
+	goImg := buildGoImage(t, tarData)
+	mkfsImg := buildMkfsImage(t, tarData)
+	fsckImageBytes(t, "go", goImg)
+	fsckImageBytes(t, "mkfs", mkfsImg)
+	compareImages(t, goImg, mkfsImg)
+}
+
+// TestCompareMergeHardLinksWithMkfs verifies that tarconv.Apply(WithMerge)
+// produces the same result as mkfs.erofs operating on the equivalent
+// pre-merged tar.
+//
+// Three sub-cases are tested:
+//
+//  1. Both canonical and alias in the same layer.
+//  2. Canonical in layer 1, alias in layer 2 (cross-layer hard link).
+//     The pre-merged tar for mkfs includes both the canonical file and the
+//     hard-link entry in one stream.
+//  3. Canonical in layer 1, alias in layer 2, with the canonical file
+//     updated (overwritten) in layer 2 — alias must reflect the update
+//     (nlink=2, new content).
+func TestCompareMergeHardLinksWithMkfs(t *testing.T) {
+	ts := fixedBuildTime
+
+	t.Run("SameLayer", func(t *testing.T) {
+		// Both canonical and alias land in the same layer — identical to the
+		// non-merge case, but exercised through WithMerge.
+		layer1 := makeTar(t, func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "data", Size: 4, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+			tw.Write([]byte("data"))
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "link", Linkname: "data", Uid: 0, Gid: 0, ModTime: ts})
+		})
+
+		// merged image via WithMerge
+		out := &buf{}
+		w := erofs.Create(out, erofs.WithBuildTime(uint64(ts.Unix()), 0))
+		if err := tarconv.Apply(w, bytes.NewReader(layer1), tarconv.WithMerge()); err != nil {
+			t.Fatalf("Apply: %v", err)
+		}
+		if err := w.Close(); err != nil {
+			t.Fatalf("Close: %v", err)
+		}
+		mergedImg := out.b
+
+		// equivalent single-layer mkfs image
+		mkfsImg := buildMkfsImage(t, layer1)
+
+		fsckImageBytes(t, "merged", mergedImg)
+		fsckImageBytes(t, "mkfs", mkfsImg)
+		compareImages(t, mergedImg, mkfsImg)
+	})
+
+	t.Run("CrossLayer", func(t *testing.T) {
+		// Canonical in layer 1, alias in layer 2.
+		layer1 := makeTar(t, func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "data", Size: 4, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+			tw.Write([]byte("data"))
+		})
+		layer2 := makeTar(t, func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "link", Linkname: "data", Uid: 0, Gid: 0, ModTime: ts})
+		})
+
+		// merged image via two Apply(WithMerge) calls
+		out := &buf{}
+		w := erofs.Create(out, erofs.WithBuildTime(uint64(ts.Unix()), 0))
+		if err := tarconv.Apply(w, bytes.NewReader(layer1), tarconv.WithMerge()); err != nil {
+			t.Fatalf("Apply layer1: %v", err)
+		}
+		if err := tarconv.Apply(w, bytes.NewReader(layer2), tarconv.WithMerge()); err != nil {
+			t.Fatalf("Apply layer2: %v", err)
+		}
+		if err := w.Close(); err != nil {
+			t.Fatalf("Close: %v", err)
+		}
+		mergedImg := out.b
+
+		// equivalent pre-merged tar for mkfs: canonical + hard link in one stream
+		preMerged := makeTar(t, func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "data", Size: 4, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+			tw.Write([]byte("data"))
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "link", Linkname: "data", Uid: 0, Gid: 0, ModTime: ts})
+		})
+		mkfsImg := buildMkfsImage(t, preMerged)
+
+		fsckImageBytes(t, "merged", mergedImg)
+		fsckImageBytes(t, "mkfs", mkfsImg)
+		compareImages(t, mergedImg, mkfsImg)
+	})
+
+	t.Run("CrossLayerWithUpdate", func(t *testing.T) {
+		// Canonical in layer 1, overwritten in layer 2, alias also in layer 2.
+		// The final image should have nlink=2 and the new content.
+		layer1 := makeTar(t, func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "data", Size: 3, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+			tw.Write([]byte("old"))
+		})
+		layer2 := makeTar(t, func(tw *tar.Writer) {
+			// Overwrite with new content.
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "data", Size: 3, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+			tw.Write([]byte("new"))
+			// Hard link to the new version.
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "link", Linkname: "data", Uid: 0, Gid: 0, ModTime: ts})
+		})
+
+		out := &buf{}
+		w := erofs.Create(out, erofs.WithBuildTime(uint64(ts.Unix()), 0))
+		if err := tarconv.Apply(w, bytes.NewReader(layer1), tarconv.WithMerge()); err != nil {
+			t.Fatalf("Apply layer1: %v", err)
+		}
+		if err := tarconv.Apply(w, bytes.NewReader(layer2), tarconv.WithMerge()); err != nil {
+			t.Fatalf("Apply layer2: %v", err)
+		}
+		if err := w.Close(); err != nil {
+			t.Fatalf("Close: %v", err)
+		}
+		mergedImg := out.b
+
+		preMerged := makeTar(t, func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "./", Mode: 0o755, Uid: 0, Gid: 0, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "data", Size: 3, Mode: 0o644, Uid: 0, Gid: 0, ModTime: ts})
+			tw.Write([]byte("new"))
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "link", Linkname: "data", Uid: 0, Gid: 0, ModTime: ts})
+		})
+		mkfsImg := buildMkfsImage(t, preMerged)
+
+		fsckImageBytes(t, "merged", mergedImg)
+		fsckImageBytes(t, "mkfs", mkfsImg)
+		compareImages(t, mergedImg, mkfsImg)
+	})
+}
+
+// TestFsckConvert validates all Convert test outputs against fsck.erofs.
+// This runs fsck on every image produced in the main convert_test.go suite.
+func TestFsckConvert(t *testing.T) {
+	if _, err := exec.LookPath("fsck.erofs"); err != nil {
+		t.Skip("fsck.erofs not in PATH")
+	}
+	ts := fixedBuildTime
+	cases := []struct {
+		name string
+		tar  func(tw *tar.Writer)
+	}{
+		{"BasicFiles", func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "etc/", Mode: 0o755, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/hosts", Size: 9, Mode: 0o644, ModTime: ts})
+			tw.Write([]byte("127.0.0.1"))
+		}},
+		{"HardLinksOutOfOrder", func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "early", Linkname: "actual", ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "actual", Size: 4, Mode: 0o644, ModTime: ts})
+			tw.Write([]byte("data"))
+		}},
+		{"DeviceNodes", func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeChar, Name: "dev/null", Mode: 0o666, Devmajor: 1, Devminor: 3, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeFifo, Name: "tmp/pipe", Mode: 0o644, ModTime: ts})
+		}},
+		{"SetuidSticky", func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "su", Size: 2, Mode: 0o4755, ModTime: ts})
+			tw.Write([]byte("su"))
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "tmp/", Mode: 0o1777, ModTime: ts})
+		}},
+		{"Whiteouts", func(tw *tar.Writer) {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh.gone", Size: 0, ModTime: ts})
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh..wh..opq", Size: 0, ModTime: ts})
+		}},
+	}
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			tarData := makeTar(t, tc.tar)
+			imgData := buildGoImage(t, tarData)
+			fsckImageBytes(t, tc.name, imgData)
+		})
+	}
+}
+
+// ----------------------------------------------------------------------------
+// Comparison benchmark: walk both images and verify matching stats.
+// ----------------------------------------------------------------------------
+
+// BenchmarkImageRoundtrip builds a medium workload, converts it, and reads
+// back every entry — measuring end-to-end throughput including image reads.
+func BenchmarkImageRoundtrip(b *testing.B) {
+	entries := mediumWorkload()
+	tarData := buildTarBytes(b, entries)
+	b.SetBytes(int64(len(tarData)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		out := &buf{}
+		w := erofs.Create(out, erofs.WithBuildTime(uint64(fixedBuildTime.Unix()), 0))
+		if err := tarconv.Apply(w, bytes.NewReader(tarData)); err != nil {
+			b.Fatalf("Convert: %v", err)
+		}
+		if err := w.Close(); err != nil {
+			b.Fatalf("Close: %v", err)
+		}
+		img, err := erofs.Open(bytes.NewReader(out.b))
+		if err != nil {
+			b.Fatalf("Open: %v", err)
+		}
+		rd, _ := img.(readDirer)
+		ls, _ := img.(lstater)
+		var walkCount int
+		var walkDir func(string)
+		walkDir = func(dir string) {
+			des, _ := rd.ReadDir(dir)
+			for _, de := range des {
+				var p string
+				if dir == "." {
+					p = de.Name()
+				} else {
+					p = dir + "/" + de.Name()
+				}
+				ls.Lstat(p)
+				walkCount++
+				if de.IsDir() {
+					walkDir(p)
+				}
+			}
+		}
+		walkDir(".")
+		_ = walkCount
+	}
+}
+
+// ----------------------------------------------------------------------------
+// Helpers used only in this file.
+// ----------------------------------------------------------------------------
+
+// mediumSyntheticTar returns tar bytes for the medium workload.
+// Reused from bench_test.go workload definitions.
+func mediumSyntheticTar(t testing.TB) []byte {
+	t.Helper()
+	return buildTarBytes(t, mediumWorkload())
+}
+
+// pathBase returns the last element of a /-separated path.
+func pathBase(p string) string {
+	if i := strings.LastIndex(p, "/"); i >= 0 {
+		return p[i+1:]
+	}
+	return p
+}
+
+// pathDir returns all but the last element of a /-separated path.
+func pathDir(p string) string {
+	if i := strings.LastIndex(p, "/"); i >= 0 {
+		return p[:i]
+	}
+	return "."
+}
+
+// writeTarFile writes a tar.Header plus optional data to a temporary file,
+// returns the path. Caller must remove.
+func writeTarToFile(t testing.TB, tarData []byte) string {
+	t.Helper()
+	f, err := os.CreateTemp("", "cmp-*.tar")
+	if err != nil {
+		t.Fatalf("create tar file: %v", err)
+	}
+	defer f.Close()
+	if _, err := f.Write(tarData); err != nil {
+		t.Fatalf("write tar file: %v", err)
+	}
+	return f.Name()
+}
+
+// readMkfsImage runs mkfs.erofs on a tar file and returns the image bytes.
+func readMkfsImageFromFile(t testing.TB, tarPath, outDir string) []byte {
+	t.Helper()
+	outPath := filepath.Join(outDir, "out.erofs")
+	f, err := os.Open(tarPath)
+	if err != nil {
+		t.Fatalf("open tar: %v", err)
+	}
+	defer f.Close()
+	args := []string{"--tar=f", "--aufs", "--quiet", "-Enoinline_data",
+		"-T" + fixedBuildTimeStr, "--all-time", outPath}
+	cmd := exec.CommandContext(context.Background(), "mkfs.erofs", args...)
+	cmd.Stdin = f
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("mkfs.erofs: %v\n%s", err, out)
+	}
+	data, err := os.ReadFile(outPath)
+	if err != nil {
+		t.Fatalf("read image: %v", err)
+	}
+	return data
+}
+
+// unused – kept to avoid "imported and not used" in pathBase/pathDir.
+var _ = pathBase
+var _ = pathDir
+var _ = writeTarToFile
+var _ = readMkfsImageFromFile
+var _ = mediumSyntheticTar
diff --git a/tarconv/convert_test.go b/tarconv/convert_test.go
new file mode 100644
index 0000000..c4301c3
--- /dev/null
+++ b/tarconv/convert_test.go
@@ -0,0 +1,922 @@
+package tarconv_test
+
+import (
+	"archive/tar"
+	"bytes"
+	"errors"
+	"io"
+	"io/fs"
+	"os"
+	"os/exec"
+	"testing"
+	"time"
+
+	erofs "github.com/erofs/go-erofs"
+
+	"github.com/containerd/continuity/tarconv"
+)
+
+// ----------------------------------------------------------------------------
+// Helpers
+// ----------------------------------------------------------------------------
+
+// buf is a simple in-memory io.WriteSeeker.
+type buf struct {
+	b   []byte
+	off int
+}
+
+func (b *buf) Write(p []byte) (int, error) {
+	end := b.off + len(p)
+	if end > len(b.b) {
+		b.b = append(b.b, make([]byte, end-len(b.b))...)
+	}
+	copy(b.b[b.off:], p)
+	b.off = end
+	return len(p), nil
+}
+
+func (b *buf) Seek(offset int64, whence int) (int64, error) {
+	var abs int64
+	switch whence {
+	case io.SeekStart:
+		abs = offset
+	case io.SeekCurrent:
+		abs = int64(b.off) + offset
+	case io.SeekEnd:
+		abs = int64(len(b.b)) + offset
+	}
+	if abs < 0 {
+		return 0, errors.New("negative seek")
+	}
+	b.off = int(abs)
+	return abs, nil
+}
+
+func (b *buf) ReadAt(p []byte, off int64) (int, error) {
+	if int(off) >= len(b.b) {
+		return 0, io.EOF
+	}
+	n := copy(p, b.b[off:])
+	if n < len(p) {
+		return n, io.EOF
+	}
+	return n, nil
+}
+
+// makeTar builds an in-memory tar stream from entries defined by f.
+func makeTar(t testing.TB, f func(tw *tar.Writer)) []byte {
+	t.Helper()
+	var out bytes.Buffer
+	tw := tar.NewWriter(&out)
+	f(tw)
+	if err := tw.Close(); err != nil {
+		t.Fatalf("tar close: %v", err)
+	}
+	return out.Bytes()
+}
+
+// buildImage applies a single tar layer using the default (convert-whiteouts) mode.
+func buildImage(t *testing.T, tarData []byte) []byte {
+	t.Helper()
+	out := &buf{}
+	w := erofs.Create(out)
+	if err := tarconv.Apply(w, bytes.NewReader(tarData)); err != nil {
+		t.Fatalf("Apply: %v", err)
+	}
+	if err := w.Close(); err != nil {
+		t.Fatalf("Writer.Close: %v", err)
+	}
+	return out.b
+}
+
+// buildMergedImage applies layers in order using WithMerge and returns the final image.
+func buildMergedImage(t *testing.T, layers ...[]byte) []byte {
+	t.Helper()
+	out := &buf{}
+	w := erofs.Create(out)
+	for i, layer := range layers {
+		if err := tarconv.Apply(w, bytes.NewReader(layer), tarconv.WithMerge()); err != nil {
+			t.Fatalf("Apply(WithMerge) layer %d: %v", i, err)
+		}
+	}
+	if err := w.Close(); err != nil {
+		t.Fatalf("Writer.Close: %v", err)
+	}
+	return out.b
+}
+
+// openImage opens an EROFS image from bytes for reading.
+func openImage(t *testing.T, data []byte) fs.FS {
+	t.Helper()
+	img, err := erofs.Open(bytes.NewReader(data))
+	if err != nil {
+		t.Fatalf("erofs.Open: %v", err)
+	}
+	return img
+}
+
+// checkFile verifies a file's content.
+func checkFile(t *testing.T, fsys fs.FS, name, want string) {
+	t.Helper()
+	got, err := fs.ReadFile(fsys, name)
+	if err != nil {
+		t.Fatalf("ReadFile %s: %v", name, err)
+	}
+	if string(got) != want {
+		t.Errorf("%s: got %q want %q", name, got, want)
+	}
+}
+
+// checkStat retrieves stat for name.
+func checkStat(t *testing.T, fsys fs.FS, name string) fs.FileInfo {
+	t.Helper()
+	info, err := fs.Stat(fsys, name)
+	if err != nil {
+		t.Fatalf("Stat %s: %v", name, err)
+	}
+	return info
+}
+
+// checkNotExist asserts the path does not exist.
+func checkNotExist(t *testing.T, fsys fs.FS, name string) {
+	t.Helper()
+	_, err := fs.Stat(fsys, name)
+	if !errors.Is(err, fs.ErrNotExist) {
+		t.Errorf("%s should not exist but Stat returned: %v", name, err)
+	}
+}
+
+// checkDirNames asserts a directory has exactly the given child names.
+func checkDirNames(t *testing.T, fsys fs.FS, dir string, want ...string) {
+	t.Helper()
+	entries, err := fs.ReadDir(fsys, dir)
+	if err != nil {
+		t.Fatalf("ReadDir %s: %v", dir, err)
+	}
+	got := make(map[string]bool)
+	for _, e := range entries {
+		got[e.Name()] = true
+	}
+	wantMap := make(map[string]bool)
+	for _, n := range want {
+		wantMap[n] = true
+	}
+	for _, n := range want {
+		if !got[n] {
+			t.Errorf("%s: missing child %q", dir, n)
+		}
+	}
+	for n := range got {
+		if !wantMap[n] {
+			t.Errorf("%s: unexpected child %q", dir, n)
+		}
+	}
+}
+
+// fsckImage runs fsck.erofs if available.
+func fsckImage(t *testing.T, data []byte) {
+	t.Helper()
+	if _, err := exec.LookPath("fsck.erofs"); err != nil {
+		return
+	}
+	f, err := os.CreateTemp("", "erofs-*.img")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(f.Name())
+	if _, err := f.Write(data); err != nil {
+		_ = f.Close()
+		t.Fatal(err)
+	}
+	_ = f.Close()
+	out, err := exec.Command("fsck.erofs", f.Name()).CombinedOutput()
+	if err != nil {
+		t.Fatalf("fsck.erofs: %v\n%s", err, out)
+	}
+}
+
+var epoch = time.Unix(1700000000, 0)
+
+// ----------------------------------------------------------------------------
+// Convert tests
+// ----------------------------------------------------------------------------
+
+// TestConvertBasicFiles exercises a simple tar with files and directories.
+func TestConvertBasicFiles(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "etc/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/hostname", Size: 10, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("myhost\n   "))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/passwd", Size: 5, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("root\n"))
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	checkFile(t, fsys, "etc/hostname", "myhost\n   ")
+	checkFile(t, fsys, "etc/passwd", "root\n")
+	info := checkStat(t, fsys, "etc")
+	if !info.IsDir() {
+		t.Error("etc should be a directory")
+	}
+}
+
+// TestConvertMetadata checks uid/gid/mtime/mode are preserved.
+func TestConvertMetadata(t *testing.T) {
+	mt := time.Unix(1600000000, 123456789)
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{
+			Typeflag: tar.TypeReg,
+			Name:     "secret",
+			Size:     3,
+			Mode:     0o600,
+			Uid:      1000,
+			Gid:      2000,
+			ModTime:  mt,
+		})
+		tw.Write([]byte("abc"))
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	info := checkStat(t, fsys, "secret")
+	if info.Mode().Perm() != 0o600 {
+		t.Errorf("mode: got %o want %o", info.Mode().Perm(), 0o600)
+	}
+	st, ok := info.Sys().(*erofs.Stat)
+	if !ok {
+		t.Fatalf("Sys() is %T, want *erofs.Stat", info.Sys())
+	}
+	if st.UID != 1000 {
+		t.Errorf("uid: got %d want 1000", st.UID)
+	}
+	if st.GID != 2000 {
+		t.Errorf("gid: got %d want 2000", st.GID)
+	}
+	if st.Mtime != uint64(mt.Unix()) {
+		t.Errorf("mtime: got %d want %d", st.Mtime, mt.Unix())
+	}
+}
+
+// TestConvertSymlink checks symlinks are preserved.
+func TestConvertSymlink(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "usr/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "usr/bin/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "usr/bin/sh", Size: 4, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("#!/s"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeSymlink, Name: "bin", Linkname: "usr/bin", Mode: 0o777, ModTime: epoch})
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	imgFS := openImage(t, img)
+	// Use Lstat to avoid following the symlink.
+	lstater, ok := imgFS.(interface{ Lstat(string) (fs.FileInfo, error) })
+	if !ok {
+		t.Skip("image FS does not implement Lstat")
+	}
+	info, err := lstater.Lstat("bin")
+	if err != nil {
+		t.Fatalf("Lstat bin: %v", err)
+	}
+	if info.Mode()&fs.ModeSymlink == 0 {
+		t.Errorf("bin: expected symlink, got %v", info.Mode())
+	}
+}
+
+// TestConvertHardLinks exercises in-order hard links.
+func TestConvertHardLinks(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "data", Size: 5, Mode: 0o644, ModTime: epoch, Uid: 100})
+		tw.Write([]byte("hello"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "data-link", Linkname: "data", ModTime: epoch, Uid: 100})
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	checkFile(t, fsys, "data", "hello")
+	checkFile(t, fsys, "data-link", "hello")
+	// Verify shared inode (nlink >= 2).
+	info, _ := fs.Stat(fsys, "data")
+	st := info.Sys().(*erofs.Stat)
+	if st.Nlink < 2 {
+		t.Errorf("data: nlink = %d, want >= 2", st.Nlink)
+	}
+}
+
+// TestConvertHardLinksOutOfOrder exercises hard links that appear before their
+// target in the tar stream.
+func TestConvertHardLinksOutOfOrder(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		// Hard link appears BEFORE the target.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "early-link", Linkname: "actual", ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "actual", Size: 4, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("data"))
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	checkFile(t, fsys, "actual", "data")
+	checkFile(t, fsys, "early-link", "data")
+	info, _ := fs.Stat(fsys, "actual")
+	st := info.Sys().(*erofs.Stat)
+	if st.Nlink < 2 {
+		t.Errorf("actual: nlink = %d, want >= 2", st.Nlink)
+	}
+}
+
+// TestConvertUnresolvedHardLink verifies that a hard link whose target never
+// appears returns an error.
+func TestConvertUnresolvedHardLink(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "broken", Linkname: "ghost", ModTime: epoch})
+	})
+	out := &buf{}
+	w := erofs.Create(out)
+	err := tarconv.Apply(w, bytes.NewReader(tarData))
+	if err == nil {
+		t.Fatal("expected error for unresolved hard link, got nil")
+	}
+}
+
+// TestConvertDeviceNodes checks char and block devices.
+func TestConvertDeviceNodes(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{
+			Typeflag: tar.TypeChar, Name: "dev/null",
+			Mode: 0o666, Devmajor: 1, Devminor: 3, ModTime: epoch,
+		})
+		tw.WriteHeader(&tar.Header{
+			Typeflag: tar.TypeBlock, Name: "dev/sda",
+			Mode: 0o660, Devmajor: 8, Devminor: 0, ModTime: epoch,
+		})
+		tw.WriteHeader(&tar.Header{
+			Typeflag: tar.TypeFifo, Name: "tmp/pipe",
+			Mode: 0o644, ModTime: epoch,
+		})
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	info := checkStat(t, fsys, "dev/null")
+	if info.Mode()&(fs.ModeDevice|fs.ModeCharDevice) != fs.ModeDevice|fs.ModeCharDevice {
+		t.Errorf("dev/null: mode %v should be char device", info.Mode())
+	}
+	st := info.Sys().(*erofs.Stat)
+	// rdev encodes major/minor; just verify it's nonzero for a known device.
+	if st.Rdev == 0 {
+		t.Errorf("dev/null: rdev should be nonzero")
+	}
+
+	info = checkStat(t, fsys, "dev/sda")
+	if info.Mode()&fs.ModeDevice == 0 || info.Mode()&fs.ModeCharDevice != 0 {
+		t.Errorf("dev/sda: mode %v should be block device", info.Mode())
+	}
+
+	info = checkStat(t, fsys, "tmp/pipe")
+	if info.Mode()&fs.ModeNamedPipe == 0 {
+		t.Errorf("tmp/pipe: mode %v should be named pipe", info.Mode())
+	}
+}
+
+// TestConvertXattrs checks PAX xattrs survive the round-trip.
+func TestConvertXattrs(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		hdr := &tar.Header{
+			Typeflag: tar.TypeReg,
+			Name:     "bin/ping",
+			Size:     4,
+			Mode:     0o755,
+			ModTime:  epoch,
+			PAXRecords: map[string]string{
+				"SCHILY.xattr.security.capability": "AQIDBA==",
+				"SCHILY.xattr.user.comment":        "hello",
+			},
+		}
+		tw.WriteHeader(hdr)
+		tw.Write([]byte("ping"))
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	info := checkStat(t, fsys, "bin/ping")
+	st, ok := info.Sys().(*erofs.Stat)
+	if !ok {
+		t.Fatalf("Sys() is %T", info.Sys())
+	}
+	if st.Xattrs["security.capability"] != "AQIDBA==" {
+		t.Errorf("security.capability: got %q", st.Xattrs["security.capability"])
+	}
+	if st.Xattrs["user.comment"] != "hello" {
+		t.Errorf("user.comment: got %q", st.Xattrs["user.comment"])
+	}
+}
+
+// TestConvertWhiteouts checks that whiteout entries become overlayfs char
+// device 0/0 entries (Convert mode).
+func TestConvertWhiteouts(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		// Create the directory so the opaque xattr has somewhere to land.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, ModTime: epoch})
+		// Opaque whiteout on lib/.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh..wh..opq", Size: 0, ModTime: epoch})
+		// Regular whiteout: removes lib/removed.so.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh.removed.so", Size: 0, ModTime: epoch})
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	// lib/removed.so should exist as a char device 0/0.
+	info := checkStat(t, fsys, "lib/removed.so")
+	if info.Mode()&(fs.ModeDevice|fs.ModeCharDevice) != fs.ModeDevice|fs.ModeCharDevice {
+		t.Errorf("lib/removed.so: expected char device whiteout, got mode %v", info.Mode())
+	}
+	st := info.Sys().(*erofs.Stat)
+	if st.Rdev != 0 {
+		t.Errorf("lib/removed.so: rdev should be 0 for whiteout, got %d", st.Rdev)
+	}
+
+	// lib itself should have trusted.overlay.opaque=y (from .wh..wh..opq) and
+	// trusted.overlay.origin="" (from the regular .wh.removed.so whiteout).
+	info = checkStat(t, fsys, "lib")
+	st = info.Sys().(*erofs.Stat)
+	if st.Xattrs[overlayOpaqueXattr] != "y" {
+		t.Errorf("lib: expected opaque xattr, got xattrs=%v", st.Xattrs)
+	}
+	if _, ok := st.Xattrs["trusted.overlay.origin"]; !ok {
+		t.Errorf("lib: expected trusted.overlay.origin from regular whiteout, got xattrs=%v", st.Xattrs)
+	}
+}
+
+// TestConvertOpaqueBeforeDir tests that the opaque xattr is applied even when
+// the .wh..wh..opq entry appears before the directory entry itself.
+func TestConvertOpaqueBeforeDir(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		// opaque marker BEFORE the directory entry.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "newdir/.wh..wh..opq", Size: 0, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "newdir/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "newdir/file.txt", Size: 3, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("hi!"))
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	info := checkStat(t, fsys, "newdir")
+	st := info.Sys().(*erofs.Stat)
+	if st.Xattrs[overlayOpaqueXattr] != "y" {
+		t.Errorf("newdir: expected opaque xattr, got xattrs=%v", st.Xattrs)
+	}
+	// Opaque directories get trusted.overlay.opaque=y only (not origin).
+	// trusted.overlay.origin is set on directories containing regular whiteouts.
+	if _, ok := st.Xattrs["trusted.overlay.origin"]; ok {
+		t.Errorf("newdir: unexpected trusted.overlay.origin on opaque dir, xattrs=%v", st.Xattrs)
+	}
+	checkFile(t, fsys, "newdir/file.txt", "hi!")
+}
+
+// TestConvertEmptyFile verifies empty regular files work.
+func TestConvertEmptyFile(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "empty", Size: 0, Mode: 0o644, ModTime: epoch})
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	checkFile(t, fsys, "empty", "")
+}
+
+// TestConvertLargeFile exercises a file that spans multiple EROFS blocks.
+func TestConvertLargeFile(t *testing.T) {
+	const size = 4*4096 + 7
+	data := make([]byte, size)
+	for i := range data {
+		data[i] = byte(i)
+	}
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "big", Size: size, Mode: 0o644, ModTime: epoch})
+		tw.Write(data)
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	got, err := fs.ReadFile(fsys, "big")
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	if !bytes.Equal(got, data) {
+		t.Errorf("large file content mismatch: got %d bytes, want %d", len(got), len(data))
+	}
+}
+
+// TestConvertSetuidBit verifies that setuid/setgid/sticky bits survive.
+func TestConvertSetuidBit(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "su", Size: 2, Mode: 0o4755, ModTime: epoch})
+		tw.Write([]byte("su"))
+	})
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	info := checkStat(t, fsys, "su")
+	// erofs.Stat carries raw unix mode; check setuid via Sys().
+	st, ok := info.Sys().(*erofs.Stat)
+	if !ok {
+		t.Fatalf("Sys() is %T, want *erofs.Stat", info.Sys())
+	}
+	// In Go's fs.FileMode, ModeSetuid is set when the unix setuid bit is present.
+	// erofs.Stat.Mode is a Go fs.FileMode.
+	if st.Mode&fs.ModeSetuid == 0 {
+		t.Errorf("su: setuid bit missing, mode=%v", st.Mode)
+	}
+}
+
+// ----------------------------------------------------------------------------
+// Merge tests
+// ----------------------------------------------------------------------------
+
+// TestMergeBasic applies two layers and checks the final state.
+func TestMergeBasic(t *testing.T) {
+	layer1 := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "etc/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/hosts", Size: 9, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("127.0.0.1"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/passwd", Size: 4, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("root"))
+	})
+	layer2 := makeTar(t, func(tw *tar.Writer) {
+		// Overwrite hosts.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/hosts", Size: 9, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("127.0.0.2"))
+		// Whiteout passwd.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/.wh.passwd", Size: 0, ModTime: epoch})
+	})
+	img := buildMergedImage(t, layer1, layer2)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	checkFile(t, fsys, "etc/hosts", "127.0.0.2")
+	checkNotExist(t, fsys, "etc/passwd")
+}
+
+// TestMergeOpaqueDir checks that .wh..wh..opq removes existing children in
+// Merge mode. The merged image must be a clean flattened result: no overlay
+// xattrs (trusted.overlay.opaque, trusted.overlay.origin) should appear
+// anywhere, and only the upper layer's children should remain.
+func TestMergeOpaqueDir(t *testing.T) {
+	layer1 := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/libc.so", Size: 4, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("libc"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/libm.so", Size: 4, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("libm"))
+	})
+	layer2 := makeTar(t, func(tw *tar.Writer) {
+		// Opaque: clear lib's children, then add only the new lib.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh..wh..opq", Size: 0, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/libz.so", Size: 4, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("libz"))
+	})
+	img := buildMergedImage(t, layer1, layer2)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	// Old children must be gone.
+	checkNotExist(t, fsys, "lib/libc.so")
+	checkNotExist(t, fsys, "lib/libm.so")
+
+	// New child must be present with correct content.
+	checkFile(t, fsys, "lib/libz.so", "libz")
+
+	// lib/ must not carry any overlay xattrs — the merged image is flat.
+	info := checkStat(t, fsys, "lib")
+	st := info.Sys().(*erofs.Stat)
+	if v, ok := st.Xattrs[overlayOpaqueXattr]; ok {
+		t.Errorf("lib: Merge should not leave %q=%q in merged image", overlayOpaqueXattr, v)
+	}
+	if v, ok := st.Xattrs["trusted.overlay.origin"]; ok {
+		t.Errorf("lib: Merge should not leave trusted.overlay.origin=%q in merged image", v)
+	}
+}
+
+// TestMergeOpaqueDeeplyNested verifies that an opaque marker on a directory
+// removes all descendants at every depth, not just direct children.
+func TestMergeOpaqueDeeplyNested(t *testing.T) {
+	layer1 := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "app/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "app/a/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "app/a/b/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "app/a/b/deep.txt", Size: 4, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("deep"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "app/a/mid.txt", Size: 3, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("mid"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "app/top.txt", Size: 3, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("top"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeSymlink, Name: "app/link", Linkname: "a/mid.txt", ModTime: epoch})
+	})
+	layer2 := makeTar(t, func(tw *tar.Writer) {
+		// Opaque wipes every descendant of app/.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "app/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "app/.wh..wh..opq", Size: 0, ModTime: epoch})
+		// Only newfile.txt from this layer should be present.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "app/newfile.txt", Size: 3, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("new"))
+	})
+	img := buildMergedImage(t, layer1, layer2)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	// All layer-1 descendants must be gone — including multi-level nesting.
+	checkNotExist(t, fsys, "app/top.txt")
+	checkNotExist(t, fsys, "app/link")
+	checkNotExist(t, fsys, "app/a")
+	checkNotExist(t, fsys, "app/a/mid.txt")
+	checkNotExist(t, fsys, "app/a/b")
+	checkNotExist(t, fsys, "app/a/b/deep.txt")
+
+	// Layer-2 content must be present.
+	checkFile(t, fsys, "app/newfile.txt", "new")
+
+	// No overlay xattrs on the merged directory.
+	info := checkStat(t, fsys, "app")
+	st := info.Sys().(*erofs.Stat)
+	if v, ok := st.Xattrs[overlayOpaqueXattr]; ok {
+		t.Errorf("app: Merge should not leave %q=%q in merged image", overlayOpaqueXattr, v)
+	}
+}
+
+// TestMergeOpaqueNoXattrs verifies that neither regular whiteouts nor opaque
+// markers leave any overlay xattrs in the merged image. Merge mode produces a
+// flat filesystem; xattrs are an overlay-layer concept that belongs only in
+// Convert mode output.
+func TestMergeOpaqueNoXattrs(t *testing.T) {
+	layer1 := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "etc/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/old.conf", Size: 4, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("old!"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/keep.conf", Size: 4, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("keep"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/old.so", Size: 3, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("old"))
+	})
+	layer2 := makeTar(t, func(tw *tar.Writer) {
+		// Regular whiteout removes etc/old.conf.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/.wh.old.conf", Size: 0, ModTime: epoch})
+		// Opaque wipes lib/ entirely and replaces with new.so.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "lib/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/.wh..wh..opq", Size: 0, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/new.so", Size: 3, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("new"))
+	})
+	img := buildMergedImage(t, layer1, layer2)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	// Structural assertions: correct merge behaviour.
+	checkNotExist(t, fsys, "etc/old.conf")
+	checkFile(t, fsys, "etc/keep.conf", "keep")
+	checkNotExist(t, fsys, "lib/old.so")
+	checkFile(t, fsys, "lib/new.so", "new")
+
+	// Walk the entire image and assert no overlay xattrs exist anywhere.
+	overlayXattrs := []string{overlayOpaqueXattr, "trusted.overlay.origin", "trusted.overlay.whiteout"}
+	err := fs.WalkDir(fsys, ".", func(p string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		fi, err := d.Info()
+		if err != nil {
+			return err
+		}
+		st, ok := fi.Sys().(*erofs.Stat)
+		if !ok {
+			return nil
+		}
+		for _, key := range overlayXattrs {
+			if v, found := st.Xattrs[key]; found {
+				t.Errorf("%s: Merge left overlay xattr %q=%q in merged image", p, key, v)
+			}
+		}
+		return nil
+	})
+	if err != nil {
+		t.Fatalf("WalkDir: %v", err)
+	}
+}
+
+// TestMergeWhiteoutMissingPath checks that whiteouts targeting non-existent
+// paths are silently ignored in Merge mode.
+func TestMergeWhiteoutMissingPath(t *testing.T) {
+	layer1 := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "exists", Size: 2, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("ok"))
+	})
+	layer2 := makeTar(t, func(tw *tar.Writer) {
+		// Whiteout for a path that was never created.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: ".wh.ghost", Size: 0, ModTime: epoch})
+	})
+	img := buildMergedImage(t, layer1, layer2)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	// Existing file should still be present.
+	checkFile(t, fsys, "exists", "ok")
+}
+
+// TestMergeHardLinks exercises hard links across a merged image.
+func TestMergeHardLinks(t *testing.T) {
+	layer1 := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "bin/sh", Size: 5, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("shell"))
+	})
+	layer2 := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "bin/bash", Linkname: "bin/sh", ModTime: epoch})
+	})
+	img := buildMergedImage(t, layer1, layer2)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+	checkFile(t, fsys, "bin/sh", "shell")
+	checkFile(t, fsys, "bin/bash", "shell")
+	info, _ := fs.Stat(fsys, "bin/sh")
+	st := info.Sys().(*erofs.Stat)
+	if st.Nlink < 2 {
+		t.Errorf("bin/sh: nlink = %d, want >= 2", st.Nlink)
+	}
+}
+
+// TestMergeThreeLayers tests a three-layer scenario similar to real container
+// images (base + deps + app).
+func TestMergeThreeLayers(t *testing.T) {
+	// Layer 1: base OS skeleton.
+	base := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "bin/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "bin/sh", Size: 2, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("sh"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "etc/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "etc/os-release", Size: 6, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("alpine"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "usr/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "usr/lib/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "usr/lib/libc.so", Size: 4, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("libc"))
+	})
+
+	// Layer 2: install a package (adds files, removes some base files).
+	deps := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "usr/bin/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "usr/bin/python3", Size: 6, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("python"))
+		// Remove bin/sh (replaced later by a symlink).
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "bin/.wh.sh", Size: 0, ModTime: epoch})
+	})
+
+	// Layer 3: app layer.
+	app := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "app/", Mode: 0o755, ModTime: epoch})
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "app/main.py", Size: 4, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("main"))
+		// Re-add sh as a symlink.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeSymlink, Name: "bin/sh", Linkname: "/bin/busybox", Mode: 0o777, ModTime: epoch})
+	})
+
+	img := buildMergedImage(t, base, deps, app)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	// etc/os-release is from layer 1 and should still be present.
+	checkFile(t, fsys, "etc/os-release", "alpine")
+	checkFile(t, fsys, "usr/bin/python3", "python")
+	checkFile(t, fsys, "app/main.py", "main")
+
+	// bin/sh was removed in layer 2 and replaced by a symlink in layer 3.
+	// Use Lstat to see the symlink itself.
+	lstater, ok := fsys.(interface{ Lstat(string) (fs.FileInfo, error) })
+	if !ok {
+		t.Skip("image FS does not implement Lstat")
+	}
+	info, err := lstater.Lstat("bin/sh")
+	if err != nil {
+		t.Fatalf("Lstat bin/sh: %v", err)
+	}
+	if info.Mode()&fs.ModeSymlink == 0 {
+		t.Errorf("bin/sh: expected symlink, got %v", info.Mode())
+	}
+}
+
+// TestConvertNoTempFile verifies that Convert itself does not create a temp
+// file for payload data. We set TMPDIR to a read-only dir and verify that
+// Convert still succeeds (meaning it doesn't need TMPDIR for its own
+// intermediate data). Note: erofs.Writer may create a spool file via
+// WithTempDir; we are only verifying Convert's own behaviour, so we pass a
+// writable tempDir to the writer explicitly.
+func TestConvertNoTempFile(t *testing.T) {
+	if os.Getuid() == 0 {
+		t.Skip("running as root, cannot test read-only tmpdir")
+	}
+	readonly, err := os.MkdirTemp("", "ro-tmpdir-*")
+	if err != nil {
+		t.Skip("cannot create temp dir:", err)
+	}
+	defer os.RemoveAll(readonly)
+	if err := os.Chmod(readonly, 0o500); err != nil {
+		t.Skip("cannot chmod temp dir:", err)
+	}
+	// Make a separate writable temp dir for the writer spool.
+	writable, err := os.MkdirTemp("", "rw-tmpdir-*")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(writable)
+
+	t.Setenv("TMPDIR", readonly)
+
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "f", Size: 3, Mode: 0o644, ModTime: epoch})
+		tw.Write([]byte("abc"))
+	})
+	out := &buf{}
+	w := erofs.Create(out, erofs.WithTempDir(writable))
+	if err := tarconv.Apply(w, bytes.NewReader(tarData)); err != nil {
+		t.Fatalf("Convert failed: %v", err)
+	}
+	_ = w.Close()
+}
+
+// ----------------------------------------------------------------------------
+// Real-image-shape test: simulates Ubuntu base layer structure
+// ----------------------------------------------------------------------------
+
+// TestConvertUbuntuLikeLayer exercises a tar that resembles a real Ubuntu
+// base layer: deep directory tree, many files, symlinks, a few device nodes.
+func TestConvertUbuntuLikeLayer(t *testing.T) {
+	tarData := makeTar(t, func(tw *tar.Writer) {
+		dirs := []string{"bin/", "sbin/", "lib/", "lib/x86_64-linux-gnu/",
+			"etc/", "etc/apt/", "usr/", "usr/bin/", "usr/lib/", "var/", "var/log/",
+			"tmp/", "root/", "home/"}
+		for _, d := range dirs {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: d, Mode: 0o755, Uid: 0, Gid: 0, ModTime: epoch})
+		}
+		// Typical binaries.
+		for _, f := range []string{"bin/sh", "bin/ls", "bin/cat", "bin/echo", "sbin/init"} {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: f, Size: 8, Mode: 0o755, ModTime: epoch})
+			tw.Write([]byte("fakebinx"))
+		}
+		// Typical libs — hard linked to each other (versioned .so).
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "lib/x86_64-linux-gnu/libc.so.6", Size: 8, Mode: 0o755, ModTime: epoch})
+		tw.Write([]byte("libcdata"))
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeLink, Name: "lib/libc.so.6", Linkname: "lib/x86_64-linux-gnu/libc.so.6", ModTime: epoch})
+		// Config files.
+		for _, f := range []string{"etc/hostname", "etc/hosts", "etc/resolv.conf"} {
+			tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: f, Size: 1, Mode: 0o644, Uid: 0, Gid: 0, ModTime: epoch})
+			tw.Write([]byte("\n"))
+		}
+		// Symlinks (common in Ubuntu).
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeSymlink, Name: "lib64", Linkname: "lib/x86_64-linux-gnu", Mode: 0o777, ModTime: epoch})
+		// Device node.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeChar, Name: "dev/null", Mode: 0o666, Devmajor: 1, Devminor: 3, ModTime: epoch})
+		// File with capability xattr (common for ping, etc.).
+		tw.WriteHeader(&tar.Header{
+			Typeflag: tar.TypeReg, Name: "usr/bin/ping", Size: 4, Mode: 0o755, ModTime: epoch,
+			PAXRecords: map[string]string{"SCHILY.xattr.security.capability": "\x01\x00\x00\x02\x00 \x00\x00"},
+		})
+		tw.Write([]byte("ping"))
+		// Empty log file.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeReg, Name: "var/log/dpkg.log", Size: 0, Mode: 0o644, ModTime: epoch})
+		// Sticky tmp.
+		tw.WriteHeader(&tar.Header{Typeflag: tar.TypeDir, Name: "tmp/", Mode: 0o1777, ModTime: epoch})
+	})
+
+	img := buildImage(t, tarData)
+	fsckImage(t, img)
+	fsys := openImage(t, img)
+
+	checkFile(t, fsys, "bin/sh", "fakebinx")
+	checkFile(t, fsys, "lib/x86_64-linux-gnu/libc.so.6", "libcdata")
+	checkFile(t, fsys, "lib/libc.so.6", "libcdata") // hard link
+
+	info, _ := fs.Stat(fsys, "lib/x86_64-linux-gnu/libc.so.6")
+	st := info.Sys().(*erofs.Stat)
+	if st.Nlink < 2 {
+		t.Errorf("libc.so.6: nlink=%d want >=2", st.Nlink)
+	}
+
+	info = checkStat(t, fsys, "tmp")
+	// Sticky bit: check via erofs.Stat.Mode which uses the properly-decoded
+	// raw inode mode (fs.FileInfo.Mode() carries it unreliably due to the
+	// reader's inode decode path).
+	st2 := info.Sys().(*erofs.Stat)
+	if st2.Mode.Perm() != 0o777 || st2.Mode&fs.ModeSticky == 0 {
+		t.Errorf("tmp: erofs.Stat.Mode=%v want drwxrwxrwt", st2.Mode)
+	}
+
+	info = checkStat(t, fsys, "usr/bin/ping")
+	st = info.Sys().(*erofs.Stat)
+	if st.Xattrs["security.capability"] == "" {
+		t.Error("ping: missing security.capability xattr")
+	}
+}
+
+const overlayOpaqueXattr = "trusted.overlay.opaque"
diff --git a/tarconv/helpers_test.go b/tarconv/helpers_test.go
new file mode 100644
index 0000000..fcd0597
--- /dev/null
+++ b/tarconv/helpers_test.go
@@ -0,0 +1,263 @@
+package tarconv_test
+
+import (
+	"archive/tar"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"testing"
+	"time"
+)
+
+// writerToTar is satisfied by any type that can emit entries to a tar.Writer.
+type writerToTar interface {
+	writeTo(tw *tar.Writer) error
+}
+
+// tarAll sequences multiple writerToTar entries.
+type tarAll []writerToTar
+
+func (a tarAll) writeTo(tw *tar.Writer) error {
+	for _, w := range a {
+		if err := w.writeTo(tw); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// tarFromWriterTo returns an io.ReadCloser streaming a tar built from wt.
+func tarFromWriterTo(wt writerToTar) io.ReadCloser {
+	r, w := io.Pipe()
+	go func() {
+		tw := tar.NewWriter(w)
+		if err := wt.writeTo(tw); err != nil {
+			_ = w.CloseWithError(err)
+			return
+		}
+		_ = tw.Close()
+		_ = w.Close()
+	}()
+	return r
+}
+
+// tarContext holds shared metadata for generated entries.
+type tarContext struct {
+	uid     int
+	gid     int
+	modTime time.Time
+	xattrs  map[string]string
+}
+
+func (tc tarContext) withModTime(t time.Time) tarContext {
+	tc.modTime = t
+	return tc
+}
+
+func (tc tarContext) withXattrs(xattrs map[string]string) tarContext {
+	tc.xattrs = xattrs
+	return tc
+}
+
+// --- tarFile ---
+
+type tarFile struct {
+	name    string
+	data    []byte
+	mode    int64
+	uid     int
+	gid     int
+	modTime time.Time
+	xattrs  map[string]string
+}
+
+func (f *tarFile) writeTo(tw *tar.Writer) error {
+	hdr := &tar.Header{
+		Typeflag: tar.TypeReg, Name: f.name,
+		Size: int64(len(f.data)), Mode: f.mode,
+		Uid: f.uid, Gid: f.gid, ModTime: f.modTime,
+	}
+	if len(f.xattrs) > 0 {
+		hdr.PAXRecords = make(map[string]string)
+		for k, v := range f.xattrs {
+			hdr.PAXRecords["SCHILY.xattr."+k] = v
+		}
+	}
+	if err := tw.WriteHeader(hdr); err != nil {
+		return err
+	}
+	if len(f.data) > 0 {
+		_, err := tw.Write(f.data)
+		return err
+	}
+	return nil
+}
+
+func (tc tarContext) file(name string, data []byte, mode int64) writerToTar {
+	return &tarFile{name: name, data: data, mode: mode, uid: tc.uid, gid: tc.gid, modTime: tc.modTime, xattrs: tc.xattrs}
+}
+
+// --- tarDir ---
+
+type tarDir struct {
+	name    string
+	mode    int64
+	uid     int
+	gid     int
+	modTime time.Time
+	xattrs  map[string]string
+}
+
+func (d *tarDir) writeTo(tw *tar.Writer) error {
+	hdr := &tar.Header{
+		Typeflag: tar.TypeDir, Name: d.name, Mode: d.mode,
+		Uid: d.uid, Gid: d.gid, ModTime: d.modTime,
+	}
+	if len(d.xattrs) > 0 {
+		hdr.PAXRecords = make(map[string]string)
+		for k, v := range d.xattrs {
+			hdr.PAXRecords["SCHILY.xattr."+k] = v
+		}
+	}
+	return tw.WriteHeader(hdr)
+}
+
+func (tc tarContext) dir(name string, mode int64) writerToTar {
+	return &tarDir{name: name, mode: mode, uid: tc.uid, gid: tc.gid, modTime: tc.modTime, xattrs: tc.xattrs}
+}
+
+// --- tarSymlink ---
+
+type tarSymlink struct {
+	name    string
+	target  string
+	uid     int
+	gid     int
+	modTime time.Time
+}
+
+func (s *tarSymlink) writeTo(tw *tar.Writer) error {
+	return tw.WriteHeader(&tar.Header{
+		Typeflag: tar.TypeSymlink, Name: s.name, Linkname: s.target,
+		Mode: 0o777, Uid: s.uid, Gid: s.gid, ModTime: s.modTime,
+	})
+}
+
+func (tc tarContext) symlink(name, target string) writerToTar {
+	return &tarSymlink{name: name, target: target, uid: tc.uid, gid: tc.gid, modTime: tc.modTime}
+}
+
+// --- tarDevice ---
+
+type tarDevice struct {
+	name     string
+	mode     int64
+	typeflag byte
+	devmajor int64
+	devminor int64
+	uid      int
+	gid      int
+	modTime  time.Time
+}
+
+func (d *tarDevice) writeTo(tw *tar.Writer) error {
+	return tw.WriteHeader(&tar.Header{
+		Typeflag: d.typeflag, Name: d.name, Mode: d.mode,
+		Devmajor: d.devmajor, Devminor: d.devminor,
+		Uid: d.uid, Gid: d.gid, ModTime: d.modTime,
+	})
+}
+
+func (tc tarContext) charDevice(name string, mode int64, major, minor int64) writerToTar {
+	return &tarDevice{name: name, mode: mode, typeflag: tar.TypeChar, devmajor: major, devminor: minor, uid: tc.uid, gid: tc.gid, modTime: tc.modTime}
+}
+
+func (tc tarContext) blockDevice(name string, mode int64, major, minor int64) writerToTar {
+	return &tarDevice{name: name, mode: mode, typeflag: tar.TypeBlock, devmajor: major, devminor: minor, uid: tc.uid, gid: tc.gid, modTime: tc.modTime}
+}
+
+func (tc tarContext) fifo(name string, mode int64) writerToTar {
+	return &tarDevice{name: name, mode: mode, typeflag: tar.TypeFifo, uid: tc.uid, gid: tc.gid, modTime: tc.modTime}
+}
+
+// --- tarHardLink ---
+
+type tarHardLink struct {
+	name    string
+	target  string
+	uid     int
+	gid     int
+	modTime time.Time
+}
+
+func (h *tarHardLink) writeTo(tw *tar.Writer) error {
+	return tw.WriteHeader(&tar.Header{
+		Typeflag: tar.TypeLink, Name: h.name, Linkname: h.target,
+		Uid: h.uid, Gid: h.gid, ModTime: h.modTime,
+	})
+}
+
+func (tc tarContext) hardLink(name, target string) writerToTar {
+	return &tarHardLink{name: name, target: target, uid: tc.uid, gid: tc.gid, modTime: tc.modTime}
+}
+
+// --- mkfs.erofs helper ---
+
+// convertTarMkfs runs mkfs.erofs to convert a tar to an EROFS image.
+// The flags --tar=f --aufs --quiet -Enoinline_data are always applied.
+// Returns an error (and skips) if mkfs.erofs is not found in PATH.
+func convertTarMkfs(ctx context.Context, t testing.TB, tarData []byte, outPath string, extraArgs []string) error {
+	t.Helper()
+	if _, err := exec.LookPath("mkfs.erofs"); err != nil {
+		t.Skip("mkfs.erofs not found in PATH")
+	}
+	f, err := os.CreateTemp("", "mkfs-bench-*.tar")
+	if err != nil {
+		return fmt.Errorf("create temp tar: %w", err)
+	}
+	defer os.Remove(f.Name())
+	if _, err := f.Write(tarData); err != nil {
+		_ = f.Close()
+		return err
+	}
+	if _, err := f.Seek(0, io.SeekStart); err != nil {
+		_ = f.Close()
+		return err
+	}
+
+	args := []string{"--tar=f", "--aufs", "--quiet", "-Enoinline_data"}
+	args = append(args, extraArgs...)
+	args = append(args, outPath)
+	cmd := exec.CommandContext(ctx, "mkfs.erofs", args...)
+	cmd.Stdin = f
+	out, err := cmd.CombinedOutput()
+	_ = f.Close()
+	if err != nil {
+		return fmt.Errorf("mkfs.erofs %v: %w\n%s", args, err, out)
+	}
+	return nil
+}
+
+// fsckErofsBytes validates an EROFS image using fsck.erofs if available.
+func fsckErofsBytes(t testing.TB, data []byte) {
+	t.Helper()
+	if _, err := exec.LookPath("fsck.erofs"); err != nil {
+		return
+	}
+	f, err := os.CreateTemp("", "erofs-*.img")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(f.Name())
+	if _, err := f.Write(data); err != nil {
+		_ = f.Close()
+		t.Fatal(err)
+	}
+	_ = f.Close()
+	out, err := exec.Command("fsck.erofs", f.Name()).CombinedOutput()
+	if err != nil {
+		t.Fatalf("fsck.erofs: %v\n%s", err, out)
+	}
+}
diff --git a/vendor/github.com/erofs/go-erofs/.golangci.yml b/vendor/github.com/erofs/go-erofs/.golangci.yml
new file mode 100644
index 0000000..d4f8fd0
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/.golangci.yml
@@ -0,0 +1,25 @@
+version: "2"
+
+run:
+  timeout: 5m
+
+linters:
+  enable:
+    - misspell
+    - gocritic
+    - revive
+    - unconvert
+    - unparam
+  settings:
+    revive:
+      rules:
+        - name: exported
+          disabled: true
+
+formatters:
+  enable:
+    - goimports
+  settings:
+    goimports:
+      local-prefixes:
+        - github.com/erofs/go-erofs
diff --git a/vendor/github.com/erofs/go-erofs/LICENSE b/vendor/github.com/erofs/go-erofs/LICENSE
new file mode 100644
index 0000000..be4c94b
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/LICENSE
@@ -0,0 +1,191 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright The go-erofs Authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/vendor/github.com/erofs/go-erofs/README.md b/vendor/github.com/erofs/go-erofs/README.md
new file mode 100644
index 0000000..dde7676
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/README.md
@@ -0,0 +1,82 @@
+# go-erofs
+
+A Go library for reading and creating [EROFS](https://erofs.docs.kernel.org/) filesystem images using the standard [fs.FS](https://pkg.go.dev/io/fs#FS) interface.
+
+## Features
+
+- **Read** EROFS images through Go's `fs.FS` interface
+- **Create** EROFS images from directories or any `fs.FS`
+- **Merge** multiple filesystem sources with overlay whiteout support
+- **Metadata-only** mode for container layer indexing (chunk-based references to original data)
+- Pure Go, no CGO — uses only the standard library
+
+### Status
+
+- [x] Read erofs files created with default `mkfs.erofs` options
+- [x] Read chunk-based erofs files with indexes
+- [x] Xattr support including long xattr prefixes
+- [x] Extra devices for chunked data
+- [x] Create erofs files from any `fs.FS`
+- [x] Directory to erofs packing
+- [x] AUFS whiteout to overlayfs conversion
+- [x] Merge multiple filesystem layers with whiteout processing
+- [ ] Read erofs files with compression
+
+## Reading an EROFS image
+
+```go
+f, err := os.Open("image.erofs")
+if err != nil {
+    log.Fatal(err)
+}
+defer f.Close()
+
+img, err := erofs.Open(f)
+if err != nil {
+    log.Fatal(err)
+}
+
+fs.WalkDir(img, ".", func(path string, d fs.DirEntry, err error) error {
+    fmt.Println(path)
+    return nil
+})
+```
+
+## Merging multiple layers
+
+Combine multiple filesystem sources into one image. The `Merge` option enables overlay semantics — AUFS-style whiteout files (`.wh.<name>`) delete entries from prior layers:
+
+```go
+outFile, _ := os.Create("merged.erofs")
+w := erofs.Create(outFile)
+
+w.CopyFrom(baseLayer)
+w.CopyFrom(overlayLayer, erofs.Merge())
+w.Close()
+```
+
+Merge can also be combined with `MetadataOnly` to build a merged index without copying data:
+
+```go
+w := erofs.Create(outFile)
+w.CopyFrom(layer1, erofs.MetadataOnly())
+w.CopyFrom(layer2, erofs.MetadataOnly(), erofs.Merge())
+w.Close()
+```
+
+## Building an image programmatically
+
+```go
+outFile, _ := os.Create("image.erofs")
+w := erofs.Create(outFile)
+
+f, _ := w.Create("/hello.txt")
+f.Write([]byte("hello world\n"))
+f.Close()
+
+w.Mkdir("/dir", 0o755)
+w.Symlink("hello.txt", "/link")
+
+w.Close()
+outFile.Close()
+```
diff --git a/vendor/github.com/erofs/go-erofs/block.go b/vendor/github.com/erofs/go-erofs/block.go
new file mode 100644
index 0000000..cea7da8
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/block.go
@@ -0,0 +1,22 @@
+package erofs
+
+type block struct {
+	buf    []byte
+	offset int32
+	end    int32
+}
+
+func (b *block) bytes() []byte {
+	if b.buf == nil || b.offset == -1 {
+		return nil
+	}
+	return b.buf[b.offset:b.end]
+}
+
+func calculateBlocks(blockBits uint8, size int64) int {
+	blockNum := size >> blockBits
+	if size > blockNum<<blockBits {
+		blockNum++
+	}
+	return int(blockNum)
+}
diff --git a/vendor/github.com/erofs/go-erofs/erofs.go b/vendor/github.com/erofs/go-erofs/erofs.go
new file mode 100644
index 0000000..bf95311
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/erofs.go
@@ -0,0 +1,1688 @@
+// Package erofs reads and creates EROFS filesystem images.
+//
+// # Reading
+//
+// Use [Open] to read an existing EROFS image through Go's standard [fs.FS]
+// interface:
+//
+//	img, err := erofs.Open(f)
+//	data, err := fs.ReadFile(img, "etc/hostname")
+//
+// # Writing
+//
+// Use [Create] to build a new EROFS image. Entries can be added one at a
+// time, or bulk-copied from any [fs.FS] via [Writer.CopyFrom]:
+//
+//	w := erofs.Create(outFile)
+//	w.CopyFrom(srcFS)
+//	w.Close()
+//
+// For metadata-only images that reference data in an external source
+// (e.g. for container layer indexing), pass [MetadataOnly] to CopyFrom:
+//
+//	w := erofs.Create(outFile)
+//	w.CopyFrom(srcFS, erofs.MetadataOnly())
+//	w.Close()
+package erofs
+
+import (
+	"bufio"
+	"bytes"
+	"cmp"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"path"
+	"slices"
+	"sync"
+	"time"
+
+	"github.com/erofs/go-erofs/internal/disk"
+)
+
+// Errors
+var (
+	// ErrInvalid occurs when an invalid value is detected in the erofs data.
+	// Whether this invalid data is the result of corruption or bad input
+	// is up to the caller to decide.
+	// This error may be wrapped with more details.
+	ErrInvalid = fs.ErrInvalid
+
+	// ErrInvalidSuperblock occurs when the super block could not be validated
+	// when initially loading the erofs input. Unlike other corruption cases,
+	// invalid super block should be returned immediately
+	ErrInvalidSuperblock = fmt.Errorf("invalid super block: %w", ErrInvalid)
+
+	// ErrNotImplemented is returned when a feature is known but not implemented
+	// yet by this library
+	ErrNotImplemented = errors.New("not implemented")
+
+	// ErrNotDirectory is returned when a path component is not a directory.
+	ErrNotDirectory = errors.New("not a directory")
+
+	// ErrIsDirectory is returned when an operation expected a file but found
+	// a directory.
+	ErrIsDirectory = errors.New("is a directory")
+
+	// ErrLoop is returned when too many symlinks are encountered during
+	// path resolution.
+	ErrLoop = fmt.Errorf("too many symlinks: %w", ErrInvalid)
+)
+
+// Stat is the raw erofs stat data returned by Sys() on [fs.FileInfo] values.
+// It is a plain data struct analogous to [syscall.Stat_t].
+//
+// For cross-platform fs.FS compatibility, callers should prefer
+// type-asserting the [fs.FileInfo] to accessor interfaces rather
+// than inspecting Stat fields directly. The returned [fs.FileInfo]
+// implements the following single-method interfaces:
+//
+//	Ownership:  UID() uint32, GID() uint32
+//	InodeInfo:  Ino() uint64, Nlink() uint64
+//	DeviceInfo: Rdev() uint64
+//	Xattrs:     GetAllXattr() map[string]string, GetXattr(string) (string, bool)
+type Stat struct {
+	Mode        fs.FileMode
+	Size        int64
+	InodeLayout uint8
+	Rdev        uint32
+	Ino         int64
+	UID         uint32
+	GID         uint32
+	Mtime       uint64
+	MtimeNs     uint32
+	Nlink       int
+	Xattrs      map[string]string
+}
+
+// holeOffset is the sentinel value for DataRange.Offset that marks a hole
+// (a sparse region of zeros) rather than backed device data.
+const holeOffset int64 = -1
+
+// DataRange describes one entry in the complete logical layout of a file's
+// content. A slice of DataRange values returned by [fileInfo.DataRange]
+// covers the file from logical byte 0 to logical byte [fs.FileInfo.Size]-1
+// in order, with no gaps or overlaps.
+//
+// The sum of all Size values in the slice must equal the file size exactly.
+// A slice whose sizes do not sum to the file size is invalid.
+//
+// Each entry is either a data entry or a hole entry:
+//
+//   - A data entry has Offset >= 0. The bytes at [Offset, Offset+Size) in
+//     Device are the file's content verbatim — uncompressed, unreferenced
+//     by transformation.
+//   - A hole entry has Offset == -1. It represents Size bytes of zeros at the
+//     current logical position. Device is ignored for hole entries.
+//
+// Compressed data should not be represented as a DataRange. When a source
+// FS contains compressed files, it should not provide DataRange() []DataRange
+// for those files (or should return nil). In full-image mode CopyFrom will
+// fall back to reading through Open(), which decompresses transparently, and
+// write the decompressed data into the output image. In MetadataOnly mode
+// there is no such fallback: files without DataRange() (or pre-built chunks)
+// are stored as chunk-based inodes with no physical mappings (all holes).
+type DataRange struct {
+	Device uint16 // device index (0 for the device assigned by CopyFrom); ignored for holes
+	Offset int64  // byte offset in the device, or -1 for a hole entry
+	Size   int64  // byte length of this entry
+}
+
+type options struct {
+	extraDevices []io.ReaderAt
+}
+
+// OpenOpt is an option for configuring the EROFS reader
+type OpenOpt func(*options)
+
+// Deprecated: Use [OpenOpt] instead, will be removed in 0.3
+type Opt = OpenOpt
+
+// WithExtraDevices specifies additional devices to read
+// chunk data from
+func WithExtraDevices(devices ...io.ReaderAt) OpenOpt {
+	return func(o *options) {
+		o.extraDevices = append(o.extraDevices, devices...)
+	}
+}
+
+// Open returns a FileSystem reading from the given ReaderAt.
+// The ReaderAt must be a valid EROFS block file.
+// No additional memory mapping is done and must be handled by
+// the caller.
+func Open(r io.ReaderAt, opts ...OpenOpt) (fs.FS, error) {
+	o := options{}
+	for _, opt := range opts {
+		opt(&o)
+	}
+	var superBlock [disk.SizeSuperBlock]byte
+	n, err := r.ReadAt(superBlock[:], disk.SuperBlockOffset)
+	if err != nil {
+		return nil, err
+	}
+
+	if n != disk.SizeSuperBlock {
+		return nil, fmt.Errorf("invalid super block: read %d bytes", n)
+	}
+
+	i := image{
+		meta: r,
+	}
+	if err = decodeSuperBlock(superBlock, &i.sb); err != nil {
+		return nil, err
+	}
+	// The maximum reasonable filesystem block size is 64k, which is
+	// the largest supported page size of aarch64 platforms.
+	if i.sb.BlkSizeBits < 9 || i.sb.BlkSizeBits > 16 {
+		return nil, fmt.Errorf("unsupported block size bits %d: %w", i.sb.BlkSizeBits, ErrInvalidSuperblock)
+	}
+	unknownFeat := i.sb.FeatureIncompat &^ disk.FeatureIncompatAll
+	if unknownFeat != 0 {
+		return nil, fmt.Errorf("unsupported incompatible feature 0x%x: %w", unknownFeat, ErrNotImplemented)
+	}
+	ondiskExtraDevices := uint32(0)
+	if i.sb.FeatureIncompat&disk.FeatureIncompatDeviceTable != 0 {
+		ondiskExtraDevices = uint32(i.sb.ExtraDevices)
+		// Calculate device_id_mask
+		// sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
+		i.deviceIDMask = uint16(roundupPowerOfTwo(uint32(i.sb.ExtraDevices)+1) - 1)
+	}
+
+	if int(ondiskExtraDevices) != len(o.extraDevices) {
+		// TODO: Provide options for skipping extra devices and error out later?
+		return nil, fmt.Errorf("invalid super block: extra devices count %d does not match provided %d", ondiskExtraDevices, len(o.extraDevices))
+	}
+
+	// Parse the device table if extra devices exist
+	if ondiskExtraDevices > 0 {
+		devTableOffset := int64(i.sb.DevtSlotOff) * disk.SizeDeviceSlot
+		i.devices = make([]deviceInfo, int(ondiskExtraDevices))
+		for idx := range i.devices {
+			var slotBuf [disk.SizeDeviceSlot]byte
+			offset := devTableOffset + int64(idx)*disk.SizeDeviceSlot
+			if _, err := r.ReadAt(slotBuf[:], offset); err != nil {
+				return nil, fmt.Errorf("failed to read device slot %d at offset %d: %w", idx, offset, err)
+			}
+			var slot disk.DeviceSlot
+			if _, err := binary.Decode(slotBuf[:], binary.LittleEndian, &slot); err != nil {
+				return nil, fmt.Errorf("failed to decode device slot %d: %w", idx, err)
+			}
+			i.devices[idx] = deviceInfo{
+				device:        o.extraDevices[idx],
+				mappedBlkAddr: slot.MappedBlkAddr,
+				blocks:        slot.Blocks,
+			}
+		}
+	}
+
+	// Error out filesystems with unsupported compressed inodes
+	if i.sb.FeatureIncompat&disk.FeatureIncompatLZ4_0Padding != 0 ||
+		i.sb.ComprAlgs != 0 {
+		return nil, fmt.Errorf("unsupported compressed filesystem (FeatureIncompat=0x%x, ComprAlgs=0x%x): %w",
+			i.sb.FeatureIncompat, i.sb.ComprAlgs, ErrNotImplemented)
+	}
+
+	i.blkPool.New = func() any {
+		return &block{
+			buf: make([]byte, 1<<i.sb.BlkSizeBits),
+		}
+	}
+
+	return &i, nil
+}
+
+// Deprecated: Use [Open] instead, will be removed in 0.3
+func EroFS(r io.ReaderAt, opts ...Opt) (fs.FS, error) {
+	return Open(r, opts...)
+}
+
+// roundupPowerOfTwo rounds v up to the next power of two.
+func roundupPowerOfTwo(v uint32) uint32 {
+	v--
+	v |= v >> 1
+	v |= v >> 2
+	v |= v >> 4
+	v |= v >> 8
+	v |= v >> 16
+	v++
+	return v
+}
+
+// deviceInfo holds the parsed mapped address range for a device table entry.
+type deviceInfo struct {
+	device        io.ReaderAt
+	mappedBlkAddr uint32 // starting mapped block address
+	blocks        uint32 // total block count for this device
+}
+
+type image struct {
+	sb disk.SuperBlock
+
+	meta         io.ReaderAt
+	devices      []deviceInfo // parsed device table entries
+	deviceIDMask uint16
+	blkPool      sync.Pool
+	longPrefixes []string // cached long xattr prefixes
+	prefixesOnce sync.Once
+	prefixesErr  error
+}
+
+// start physical offset of the separate metadata zone
+func (img *image) metaStartPos() int64 {
+	return int64(img.sb.MetaBlkAddr) << int64(img.sb.BlkSizeBits)
+}
+
+// maxReadFileSize is the maximum file size that ReadFile will allocate.
+// ReadFile is intended for small files; for larger files, callers should
+// use Open and io.Copy. 128 MiB is generous for typical use (configs,
+// manifests, symlink targets, etc.) while guarding against
+// unexpectedly large files.
+const maxReadFileSize = 128 << 20 // 128 MiB
+
+// mapDev resolves map->m_bdev and map->m_pa mapping for go-erofs.
+// It works similarly to erofs_map_dev in the linux kernel.
+func (img *image) mapDev(deviceID uint16, pa int64) (io.ReaderAt, int64, error) {
+	if deviceID > 0 {
+		if int(deviceID) > len(img.devices) {
+			return nil, 0, fmt.Errorf("invalid device id %d", deviceID)
+		}
+		return img.devices[deviceID-1].device, pa, nil
+	}
+
+	if len(img.devices) > 0 {
+		for _, dev := range img.devices {
+			if dev.mappedBlkAddr == 0 {
+				continue
+			}
+
+			startOff := int64(dev.mappedBlkAddr) << img.sb.BlkSizeBits
+			length := int64(dev.blocks) << img.sb.BlkSizeBits
+
+			if pa >= startOff && pa < startOff+length {
+				return dev.device, pa - startOff, nil
+			}
+		}
+	}
+
+	return img.meta, pa, nil
+}
+
+// blockSize returns the filesystem block size.
+func (img *image) blockSize() uint32 { return 1 << img.sb.BlkSizeBits }
+
+// buildTime returns the build timestamp from the superblock.
+func (img *image) buildTime() uint64 { return img.sb.BuildTime }
+
+// deviceBlocks returns the total block count across all extra devices.
+// Each device's block count is reported at the device's native block size
+// (matching the superblock block size).
+func (img *image) deviceBlocks() []uint64 {
+	if len(img.devices) == 0 {
+		return nil
+	}
+	blocks := make([]uint64, len(img.devices))
+	for i, d := range img.devices {
+		blocks[i] = uint64(d.blocks)
+	}
+	return blocks
+}
+
+// openDirect returns an io.Reader for a file's data that reads directly
+// from the underlying metadata reader, bypassing the block-at-a-time
+// Read path. Returns nil if direct reading is not possible (e.g.
+// chunk-based or compressed files).
+func (img *image) openDirect(ino *inode) io.Reader {
+	if ino.size <= 0 {
+		return nil
+	}
+	blockSize := int64(1 << img.sb.BlkSizeBits)
+	switch ino.inodeLayout {
+	case disk.LayoutFlatPlain:
+		// Data is contiguous starting at dataBlkAddr.
+		dataOffset := int64(ino.inodeData) << img.sb.BlkSizeBits
+		return io.NewSectionReader(img.meta, dataOffset, ino.size)
+	case disk.LayoutFlatInline:
+		// Last block is inline after the inode; earlier blocks at dataBlkAddr.
+		// Only use direct read for single-block files (all data inline).
+		if ino.size > blockSize {
+			return nil
+		}
+		inodeAddr := img.metaStartPos() + int64(ino.nid)*disk.SizeInodeCompact
+		trailingAddr := inodeAddr + ino.flatDataOffset()
+		return io.NewSectionReader(img.meta, trailingAddr, ino.size)
+	case disk.LayoutChunkBased:
+		// Chunk-based files store data at the physical block addresses
+		// listed in the chunk index. For contiguous single-device files,
+		// the data is laid out consecutively and can be read directly.
+		chunkFmt := uint16(ino.inodeData)
+		if chunkFmt&disk.LayoutChunkFormatIndexes == 0 {
+			return nil
+		}
+		chunkBits := img.sb.BlkSizeBits + uint8(chunkFmt&disk.LayoutChunkFormatBits)
+		nchunks := int((ino.size-1)>>chunkBits) + 1
+
+		// Read chunk index entries to check contiguity.
+		inodeStart := img.metaStartPos() + int64(ino.nid)*disk.SizeInodeCompact
+		baseOffset := inodeStart + ino.flatDataOffset()
+		if baseOffset%8 != 0 {
+			baseOffset = (baseOffset + 7) & ^int64(7)
+		}
+		needed := int64(nchunks) * int64(disk.SizeChunkIndex)
+		if needed > maxChunkIndexBytes {
+			return nil
+		}
+		idxBuf := make([]byte, needed)
+		if _, err := img.meta.ReadAt(idxBuf, baseOffset); err != nil {
+			return nil
+		}
+
+		// Check that all chunks are contiguous on the same device.
+		var startBlock uint64
+		var deviceID uint16
+		for i := range nchunks {
+			off := i * disk.SizeChunkIndex
+			blkLo := binary.LittleEndian.Uint32(idxBuf[off+4 : off+8])
+			if ^blkLo == 0 {
+				return nil // hole
+			}
+			blkHi := binary.LittleEndian.Uint16(idxBuf[off : off+2])
+			did := binary.LittleEndian.Uint16(idxBuf[off+2:off+4]) & img.deviceIDMask
+			phys := (uint64(blkHi) << 32) | uint64(blkLo)
+
+			blocksPerChunk := uint64(1 << (chunkBits - img.sb.BlkSizeBits))
+			if i == 0 {
+				startBlock = phys
+				deviceID = did
+			} else {
+				expected := startBlock + uint64(i)*blocksPerChunk
+				if phys != expected || did != deviceID {
+					return nil // not contiguous or different device
+				}
+			}
+		}
+
+		// All chunks contiguous — resolve through the device.
+		dataOffset := int64(startBlock) << img.sb.BlkSizeBits
+		if deviceID > 0 && int(deviceID) <= len(img.devices) {
+			return io.NewSectionReader(img.devices[deviceID-1].device, dataOffset, ino.size)
+		}
+		return io.NewSectionReader(img.meta, dataOffset, ino.size)
+	default:
+		return nil
+	}
+}
+
+func (img *image) readMetadata(r io.Reader) ([]byte, error) {
+	// - A 2-byte little-endian length field, which is aligned to a 4-byte boundary
+	// - The length bytes of payload data
+	var lenBuf [2]byte
+	if _, err := io.ReadFull(r, lenBuf[:]); err != nil {
+		return nil, fmt.Errorf("failed to read metadata length %v: %w", lenBuf, err)
+	}
+
+	dataLen := int(binary.LittleEndian.Uint16(lenBuf[:]))
+	if dataLen < 1 {
+		dataLen = 65536
+	}
+
+	data := make([]byte, dataLen)
+	if _, err := io.ReadFull(r, data); err != nil {
+		return nil, fmt.Errorf("failed to read metadata payload: %w", err)
+	}
+
+	// Align to 4-byte boundary except for hitting EOF
+	totalLen := 2 + dataLen
+	if rem := totalLen % 4; rem != 0 {
+		padding := int64(4 - rem)
+		if _, err := io.CopyN(io.Discard, r, padding); err != nil &&
+			!errors.Is(err, io.EOF) &&
+			!errors.Is(err, io.ErrUnexpectedEOF) {
+			return nil, fmt.Errorf("failed to discard padding of %d bytes: %w", padding, err)
+		}
+	}
+	return data, nil
+}
+
+// loadLongPrefixes loads and caches the long xattr prefixes from the packed inode
+// using the regular inode read logic to handle compressed/non-inline data.
+//
+// Long xattr name prefixes are used to optimize storage of xattrs with common
+// prefixes. They are stored sequentially in a special "packed inode" or
+// "meta inode".
+// See: https://docs.kernel.org/filesystems/erofs.html#extended-attributes
+func (img *image) loadLongPrefixes() error {
+	img.prefixesOnce.Do(func() {
+		if img.sb.XattrPrefixCount == 0 {
+			return
+		}
+
+		var r io.Reader
+
+		// Calculate the starting offset. XattrPrefixStart is defined in the
+		// superblock as being in units of 4 bytes from the start of the corresponding inode
+		startOffset := int64(img.sb.XattrPrefixStart) * 4
+
+		if (img.sb.FeatureIncompat&disk.FeatureIncompatFragments != 0) && img.sb.PackedNid > 0 {
+			// The packed inode (identified by PackedNid in the superblock) is a special
+			// inode used for shared data and metadata.
+			// We use ".packed" as a descriptive name for this internal inode.
+			f := &file{
+				img:   img,
+				name:  ".packed",
+				nid:   img.sb.PackedNid,
+				ftype: 0, // regular file
+			}
+
+			// Read inode info to determine size and layout
+			fi, err := f.readInfo()
+			if err != nil {
+				img.prefixesErr = fmt.Errorf("failed to read packed inode: %w", err)
+				return
+			}
+
+			if startOffset > fi.size {
+				img.prefixesErr = fmt.Errorf("xattr prefix start offset %d exceeds packed inode size %d", startOffset, fi.size)
+				return
+			}
+
+			// Set the read offset
+			f.offset = startOffset
+			r = bufio.NewReader(f)
+		} else {
+			// FIXME(hsiangkao): should avoid hacky 1<<32 here since we don't care about the end
+			r = io.NewSectionReader(img.meta, startOffset, 1<<32)
+		}
+
+		img.longPrefixes = make([]string, img.sb.XattrPrefixCount)
+		for i := 0; i < int(img.sb.XattrPrefixCount); i++ {
+			data, err := img.readMetadata(r)
+			if err != nil {
+				img.prefixesErr =
+					fmt.Errorf("failed to read long xattr prefix %d: %w", i, err)
+				return
+			}
+
+			// First byte is the base_index referencing a standard xattr prefix
+			baseIndex := xattrIndex(data[0])
+
+			// Remaining bytes are the infix to be appended to the base prefix
+			infix := string(data[1:])
+
+			// Construct full prefix: base prefix + infix
+			img.longPrefixes[i] = baseIndex.String() + infix
+		}
+	})
+	return img.prefixesErr
+}
+
+// getLongPrefix returns the long xattr prefix at the given index
+func (img *image) getLongPrefix(index uint8) (string, error) {
+	if err := img.loadLongPrefixes(); err != nil {
+		return "", err
+	}
+
+	if int(index) >= len(img.longPrefixes) {
+		return "", fmt.Errorf("long xattr prefix index %d out of range (max %d)", index, len(img.longPrefixes)-1)
+	}
+
+	return img.longPrefixes[index], nil
+}
+
+func (img *image) loadAt(addr, size int64) (*block, error) {
+	blkSize := int64(1 << img.sb.BlkSizeBits)
+	if size > blkSize {
+		size = blkSize
+	}
+
+	b := img.getBlock()
+	if n, err := img.meta.ReadAt(b.buf[:size], addr); err != nil {
+		img.putBlock(b)
+		return nil, fmt.Errorf("failed to read %d bytes at %d: %w", size, addr, err)
+	} else {
+		b.offset = 0
+		b.end = int32(n)
+	}
+
+	return b, nil
+}
+
+// loadBlock loads the block with the given data
+func (img *image) loadBlock(fi *inode, pos int64) (*block, error) {
+	nblocks := calculateBlocks(img.sb.BlkSizeBits, fi.size)
+	bn := int(pos >> int(img.sb.BlkSizeBits))
+	if bn >= nblocks {
+		return nil, fmt.Errorf("block position larger than number of blocks for inode: %w", io.EOF)
+	}
+	var addr int64
+	blockSize := int(1 << img.sb.BlkSizeBits)
+	blockOffset := 0
+	blockEnd := blockSize
+	switch fi.inodeLayout {
+	case disk.LayoutFlatPlain:
+		// flat plain has no holes
+		addr = int64(int(fi.inodeData)+bn) << img.sb.BlkSizeBits
+		blockOffset = int(pos % int64(blockSize))
+		if bn == nblocks-1 {
+			blockEnd = int(fi.size - int64(bn)*int64(1<<img.sb.BlkSizeBits))
+		}
+	case disk.LayoutFlatInline:
+		// If on the last block, validate
+		if bn == nblocks-1 {
+			addr = img.metaStartPos() + int64(fi.nid*disk.SizeInodeCompact)
+			// Move to the data offset from the start of the inode
+			addr += fi.flatDataOffset()
+
+			// Get the offset from the start of the block
+			blockOffset = int(addr & int64(blockSize-1))
+
+			// Move addr to start of block
+			addr = (addr & ^int64(blockSize-1))
+
+			// Compute end of inline data within the block (before adjusting
+			// blockOffset for the read position).
+			blockEnd = int(fi.size-int64(bn*blockSize)) + blockOffset
+
+			// Move the offset within the block based on position within file
+			blockOffset += int(pos - int64(bn<<int(img.sb.BlkSizeBits)))
+
+			// Ensure the last block is not exceeded
+			if blockEnd > blockSize {
+				return nil, fmt.Errorf("inline data cross block boundary for nid %d: %w", fi.nid, ErrInvalid)
+			}
+		} else {
+			addr = int64(int(fi.inodeData)+bn) << img.sb.BlkSizeBits
+			blockOffset = int(pos % int64(blockSize))
+		}
+	case disk.LayoutChunkBased:
+		// first 2 le bytes for format, second 2 bytes are reserved
+		format := uint16(fi.inodeData)
+		if format&disk.LayoutChunkFormat48Bit != 0 {
+			return nil, fmt.Errorf("48-bit chunk format for nid %d: %w", fi.nid, ErrNotImplemented)
+		}
+		if format&^(disk.LayoutChunkFormatBits|disk.LayoutChunkFormatIndexes) != 0 {
+			return nil, fmt.Errorf("unsupported chunk format %x for nid %d: %w", format, fi.nid, ErrInvalid)
+		}
+
+		chunkbits := img.sb.BlkSizeBits + uint8(format&disk.LayoutChunkFormatBits)
+		chunkn := int((fi.size-1)>>chunkbits) + 1
+		cn := int(pos >> chunkbits)
+
+		if cn >= chunkn {
+			return nil, fmt.Errorf("chunk format does not fit into allocated bytes for nid %d: %w", fi.nid, ErrInvalid)
+		}
+
+		inodeStart := img.metaStartPos() + int64(fi.nid*disk.SizeInodeCompact)
+		baseOffset := inodeStart + fi.flatDataOffset()
+
+		unit := 4
+		if format&disk.LayoutChunkFormatIndexes == disk.LayoutChunkFormatIndexes {
+			unit = 8
+			// Align to 8 bytes
+			if baseOffset%8 != 0 {
+				baseOffset = (baseOffset + 7) & ^int64(7)
+			}
+		}
+
+		entryPos := baseOffset + int64(cn*unit)
+		var entryBuf [8]byte
+		if n, err := img.meta.ReadAt(entryBuf[:unit], entryPos); err != nil {
+			return nil, fmt.Errorf("failed to read chunk entry at %d: %w", entryPos, err)
+		} else if n != unit {
+			return nil, fmt.Errorf("short read of chunk entry at %d: read %d bytes, expected %d", entryPos, n, unit)
+		}
+
+		var addr int64
+		var deviceID uint16
+
+		if unit == 8 {
+			startBlkLo := binary.LittleEndian.Uint32(entryBuf[4:8])
+			if ^startBlkLo == 0 {
+				addr = -1
+			} else {
+				addr = int64(startBlkLo) << img.sb.BlkSizeBits
+				deviceID = binary.LittleEndian.Uint16(entryBuf[2:4]) & img.deviceIDMask
+			}
+		} else {
+			rawAddr := binary.LittleEndian.Uint32(entryBuf[:4])
+			if ^rawAddr == 0 {
+				addr = -1
+			} else {
+				addr = int64(rawAddr) << img.sb.BlkSizeBits
+			}
+		}
+
+		if bn == nblocks-1 {
+			blockEnd = int(fi.size - int64(bn)*int64(1<<img.sb.BlkSizeBits))
+		}
+		blockOffset = int(pos % int64(blockSize))
+
+		if addr == -1 {
+			// Null address, return new zero filled block
+			return &block{
+				buf:    make([]byte, 1<<img.sb.BlkSizeBits),
+				offset: int32(blockOffset),
+				end:    int32(blockEnd),
+			}, nil
+		}
+
+		// Add block offset within chunk
+		blockPos := int64(bn) << img.sb.BlkSizeBits
+		if blockPos > 0 {
+			addr += (blockPos - int64(cn<<chunkbits))
+		}
+
+		reader, mappedAddr, err := img.mapDev(deviceID, addr)
+		if err != nil {
+			return nil, fmt.Errorf("failed to map device for nid %d: %w", fi.nid, err)
+		}
+		addr = mappedAddr
+
+		if blockOffset < 0 || blockEnd > blockSize || blockOffset >= blockEnd {
+			return nil, fmt.Errorf("invalid chunk block bounds [%d:%d] for nid %d: %w", blockOffset, blockEnd, fi.nid, ErrInvalid)
+		}
+		b := img.getBlock()
+		if n, err := reader.ReadAt(b.buf[blockOffset:blockEnd], addr+int64(blockOffset)); err != nil {
+			img.putBlock(b)
+			return nil, fmt.Errorf("failed to read block for nid %d: %w", fi.nid, err)
+		} else if n != (blockEnd - blockOffset) {
+			img.putBlock(b)
+			return nil, fmt.Errorf("failed to read full block for nid %d: %w", fi.nid, ErrInvalid)
+		}
+		b.offset = int32(blockOffset)
+		b.end = int32(blockEnd)
+		return b, nil
+	case disk.LayoutCompressedFull, disk.LayoutCompressedCompact:
+		return nil, fmt.Errorf("inode layout (%d) for %d: %w", fi.inodeLayout, fi.nid, ErrNotImplemented)
+	default:
+		return nil, fmt.Errorf("inode layout (%d) for %d: %w", fi.inodeLayout, fi.nid, ErrInvalid)
+	}
+	if blockOffset < 0 || blockEnd > blockSize || blockOffset >= blockEnd {
+		return nil, fmt.Errorf("invalid block bounds [%d:%d] for nid %d: %w", blockOffset, blockEnd, fi.nid, ErrInvalid)
+	}
+
+	b := img.getBlock()
+	b.offset = int32(blockOffset)
+	b.end = int32(blockEnd)
+	if n, err := img.meta.ReadAt(b.bytes(), addr+int64(blockOffset)); err != nil {
+		img.putBlock(b)
+		return nil, fmt.Errorf("failed to read block for nid %d: %w", fi.nid, err)
+	} else if n != blockEnd-blockOffset {
+		img.putBlock(b)
+		return nil, fmt.Errorf("failed to read full block for nid %d: %w, expected %d, actual %d", fi.nid, ErrInvalid, blockEnd-blockOffset, n)
+	}
+	return b, nil
+}
+
+func (img *image) getBlock() *block {
+	return img.blkPool.Get().(*block)
+}
+
+// putBlock returns a block after complete so its
+// buffer can be put back into the buffer pool
+func (img *image) putBlock(b *block) {
+	img.blkPool.Put(b)
+}
+
+const maxSymlinks = 255
+
+// maxSymlinkSize is the maximum size of a symlink target.
+// Linux PATH_MAX is 4096; we use the same limit.
+const maxSymlinkSize = 4096
+
+// readLink reads the symlink target for the given nid.
+func (i *image) readLink(nid uint64, name string) (string, error) {
+	f := &file{img: i, name: name, nid: nid, ftype: fs.ModeSymlink}
+	fi, err := f.readInfo()
+	if err != nil {
+		return "", err
+	}
+	if fi.size < 0 || fi.size > maxSymlinkSize {
+		return "", fmt.Errorf("symlink target size %d out of range: %w", fi.size, ErrInvalid)
+	}
+	buf := make([]byte, fi.size)
+	if fi.size > 0 {
+		if _, err = f.Read(buf); err != nil && err != io.EOF {
+			return "", err
+		}
+	}
+	return string(buf), nil
+}
+
+// resolve cleans the path and walks directory entries to find the target inode.
+// When follow is true, symlinks are followed (including the final component).
+// When follow is false, the final component is not followed (for Lstat/ReadLink).
+// Intermediate symlinks are always followed.
+func (i *image) resolve(op, name string, follow bool) (nid uint64, ftype fs.FileMode, basename string, err error) {
+	original := name
+	if path.IsAbs(name) {
+		name = name[1:]
+	}
+	name = path.Clean(name)
+	if name == "." {
+		name = ""
+	}
+
+	nid = uint64(i.sb.RootNid)
+	ftype = fs.ModeDir
+
+	// curPath tracks the full resolved path of the current directory
+	// so that relative symlink targets can be resolved correctly.
+	linksFollowed := 0
+	curPath := ""
+	basename = name
+	for name != "" {
+		var sep int
+		for sep < len(name) && name[sep] != '/' {
+			sep++
+		}
+		var rest string
+		if sep < len(name) {
+			basename = name[:sep]
+			rest = name[sep+1:]
+		} else {
+			basename = name
+			rest = ""
+		}
+
+		if ftype != fs.ModeDir {
+			return 0, 0, "", &fs.PathError{Op: op, Path: original, Err: ErrNotDirectory}
+		}
+		d := &dir{
+			file: file{
+				img:   i,
+				name:  basename,
+				nid:   nid,
+				ftype: ftype,
+			},
+		}
+		entNid, entFtype, err := d.lookup(basename)
+		if err != nil {
+			return 0, 0, "", &fs.PathError{Op: op, Path: original, Err: err}
+		}
+		nid = entNid
+		ftype = entFtype & fs.ModeType
+
+		// Follow symlinks for intermediate components always,
+		// and for the final component only when follow is true.
+		isFinal := rest == ""
+		if ftype&fs.ModeSymlink != 0 && (follow || !isFinal) {
+			linksFollowed++
+			if linksFollowed > maxSymlinks {
+				return 0, 0, "", &fs.PathError{Op: op, Path: original, Err: ErrLoop}
+			}
+			target, err := i.readLink(nid, basename)
+			if err != nil {
+				return 0, 0, "", err
+			}
+			// Prepend the symlink target to the remaining path
+			if rest != "" {
+				target = target + "/" + rest
+			}
+			// Resolve relative to the parent directory's full path
+			if !path.IsAbs(target) {
+				target = curPath + "/" + target
+			}
+			// Clean and re-resolve from root
+			target = path.Clean(target)
+			if len(target) > 0 && target[0] == '/' {
+				target = target[1:]
+			}
+			nid = uint64(i.sb.RootNid)
+			ftype = fs.ModeDir
+			curPath = ""
+			name = target
+			if name == "." {
+				name = ""
+			}
+			basename = name
+			continue
+		}
+
+		if curPath == "" {
+			curPath = basename
+		} else {
+			curPath = curPath + "/" + basename
+		}
+		name = rest
+	}
+
+	if basename == "" {
+		basename = original
+	}
+	return nid, ftype, basename, nil
+}
+
+func (i *image) Open(name string) (fs.File, error) {
+	nid, ftype, basename, err := i.resolve("open", name, true)
+	if err != nil {
+		return nil, err
+	}
+	b := file{img: i, name: basename, nid: nid, ftype: ftype}
+	if ftype.IsDir() {
+		return &dir{file: b}, nil
+	}
+	return &b, nil
+}
+
+func (i *image) Stat(name string) (fs.FileInfo, error) {
+	nid, ftype, basename, err := i.resolve("stat", name, true)
+	if err != nil {
+		return nil, err
+	}
+	f := &file{img: i, name: basename, nid: nid, ftype: ftype}
+	return f.statInfo()
+}
+
+// ReadFile reads the named file and returns its contents.
+// Files larger than maxReadFileSize (128 MiB) are rejected;
+// use Open and io.Copy for larger files.
+func (i *image) ReadFile(name string) ([]byte, error) {
+	nid, ftype, basename, err := i.resolve("readfile", name, true)
+	if err != nil {
+		return nil, err
+	}
+	if ftype.IsDir() {
+		return nil, &fs.PathError{Op: "read", Path: name, Err: ErrIsDirectory}
+	}
+	f := &file{img: i, name: basename, nid: nid, ftype: ftype}
+	fi, err := f.readInfo()
+	if err != nil {
+		return nil, err
+	}
+	if fi.size < 0 || fi.size > maxReadFileSize {
+		return nil, fmt.Errorf("file size %d exceeds ReadFile limit %d; use Open and io.Copy for large files: %w", fi.size, int64(maxReadFileSize), ErrInvalid)
+	}
+	buf := make([]byte, fi.size)
+	if fi.size > 0 {
+		if _, err = f.Read(buf); err != nil && err != io.EOF {
+			return nil, err
+		}
+	}
+	return buf, nil
+}
+
+func (i *image) ReadDir(name string) ([]fs.DirEntry, error) {
+	nid, ftype, basename, err := i.resolve("readdir", name, true)
+	if err != nil {
+		return nil, err
+	}
+	if !ftype.IsDir() {
+		return nil, &fs.PathError{Op: "readdir", Path: name, Err: ErrNotDirectory}
+	}
+	d := &dir{file: file{img: i, name: basename, nid: nid, ftype: ftype}}
+	entries, err := d.ReadDir(-1)
+	if err != nil {
+		return nil, err
+	}
+	slices.SortFunc(entries, func(a, b fs.DirEntry) int {
+		return cmp.Compare(a.Name(), b.Name())
+	})
+	return entries, nil
+}
+
+func (i *image) ReadLink(name string) (string, error) {
+	nid, ftype, basename, err := i.resolve("readlink", name, false)
+	if err != nil {
+		return "", err
+	}
+	if ftype&fs.ModeSymlink == 0 {
+		return "", &fs.PathError{Op: "readlink", Path: name, Err: fs.ErrInvalid}
+	}
+	return i.readLink(nid, basename)
+}
+
+func (i *image) Lstat(name string) (fs.FileInfo, error) {
+	nid, ftype, basename, err := i.resolve("lstat", name, false)
+	if err != nil {
+		return nil, err
+	}
+	f := &file{img: i, name: basename, nid: nid, ftype: ftype}
+	return f.statInfo()
+}
+
+type file struct {
+	img   *image
+	name  string
+	nid   uint64
+	ftype fs.FileMode
+
+	// Mutable fields, open file should not be accessed concurrently
+	offset int64  // current offset for read operations
+	info   *inode // cached inode
+}
+
+func (b *file) readInfo() (ino *inode, err error) {
+	if b.info != nil {
+		return b.info, nil
+	}
+
+	addr := b.img.metaStartPos() + int64(b.nid*disk.SizeInodeCompact)
+	blkSize := int32(1 << b.img.sb.BlkSizeBits)
+	blk := b.img.getBlock()
+	blk.offset = int32(addr & int64(blkSize-1))
+	blk.end = blkSize
+	if blk.end-blk.offset < disk.SizeInodeExtended {
+		// Use buffer starting from beginning of inode, do not use the position
+		// in the block since an extended inode may span multiple blocks
+		blk.offset = 0
+		blk.end = disk.SizeInodeExtended
+	}
+
+	defer func() {
+		v := recover()
+		if v != nil {
+			err = fmt.Errorf("file format error: %v", v)
+		}
+		if err != nil {
+			b.img.putBlock(blk)
+		}
+
+	}()
+
+	buf := blk.bytes()
+	_, err = b.img.meta.ReadAt(buf, addr)
+	if err != nil {
+		return nil, err
+	}
+
+	var format, xcnt uint16
+	if _, err = binary.Decode(buf[:2], binary.LittleEndian, &format); err != nil {
+		return nil, err
+	}
+
+	layout := uint8((format & 0x0E) >> 1)
+	if format&0x01 == 0 {
+		var di disk.InodeCompact
+		if _, err := binary.Decode(buf[:disk.SizeInodeCompact], binary.LittleEndian, &di); err != nil {
+			return nil, err
+		}
+		b.info = &inode{
+			name:        b.name,
+			nid:         b.nid,
+			icsize:      disk.SizeInodeCompact,
+			inodeLayout: layout,
+			inodeData:   di.InodeData,
+			size:        int64(di.Size),
+			mode:        (fs.FileMode(di.Mode) & ^fs.ModeType) | b.ftype,
+			rawMode:     di.Mode,
+			uid:         uint32(di.UID),
+			gid:         uint32(di.GID),
+			nlink:       int(di.Nlink),
+			mtime:       b.img.sb.BuildTime,
+			mtimeNs:     b.img.sb.BuildTimeNs,
+		}
+		xcnt = di.XattrCount
+	} else {
+		var di disk.InodeExtended
+		if _, err = binary.Decode(buf[:disk.SizeInodeExtended], binary.LittleEndian, &di); err != nil {
+			return nil, err
+		}
+		b.info = &inode{
+			name:        b.name,
+			nid:         b.nid,
+			icsize:      disk.SizeInodeExtended,
+			inodeLayout: layout,
+			inodeData:   di.InodeData,
+			size:        int64(di.Size),
+			mode:        (fs.FileMode(di.Mode) & ^fs.ModeType) | b.ftype,
+			rawMode:     di.Mode,
+			uid:         di.UID,
+			gid:         di.GID,
+			nlink:       int(di.Nlink),
+			mtime:       di.Mtime,
+			mtimeNs:     di.MtimeNs,
+		}
+		xcnt = di.XattrCount
+	}
+
+	if xcnt > 0 {
+		b.info.xsize = int(xcnt-1)*disk.SizeXattrEntry + disk.SizeXattrBodyHeader
+	}
+
+	switch {
+	case b.info.inodeLayout == disk.LayoutFlatPlain || b.info.size == 0 || blk.end != blkSize:
+		b.img.putBlock(blk)
+	default:
+		// If the inode has trailing data used later, cache it
+		b.info.cached = blk
+	}
+	return b.info, nil
+}
+
+// statInfo reads the inode and builds a fileInfo with full stat data
+// including extended attributes. The cached block is released since
+// stat callers do not need inline data.
+func (b *file) statInfo() (*fileInfo, error) {
+	ino, err := b.readInfo()
+	if err != nil {
+		return nil, err
+	}
+	fi := &fileInfo{
+		name:    ino.name,
+		size:    ino.size,
+		mode:    ino.mode,
+		mtime:   ino.mtime,
+		mtimeNs: ino.mtimeNs,
+		stat: &Stat{
+			Mode:        disk.EroFSModeToGoFileMode(ino.rawMode),
+			Size:        ino.size,
+			InodeLayout: ino.inodeLayout,
+			Ino:         int64(ino.nid),
+			Rdev:        disk.RdevFromMode(ino.rawMode, ino.inodeData),
+			UID:         ino.uid,
+			GID:         ino.gid,
+			Nlink:       ino.nlink,
+			Mtime:       ino.mtime,
+			MtimeNs:     ino.mtimeNs,
+		},
+	}
+	if ino.xsize > 0 {
+		if err := loadXattrs(b, fi.stat); err != nil {
+			return nil, err
+		}
+	}
+	// Build data ranges for regular files.
+	// Flat layouts are cheap (no I/O) — compute eagerly.
+	// Chunk-based layout requires a ReadAt on the image; defer until needed.
+	if ino.mode.IsRegular() && ino.size > 0 {
+		if ino.inodeLayout == disk.LayoutChunkBased {
+			// Capture a snapshot of the fields buildChunkDataRanges needs.
+			// We must not capture ino by pointer: the caller may reuse it,
+			// and cached block is released below.
+			inoCopy := *ino
+			inoCopy.cached = nil
+			img := b.img
+			fi.rangesLoader = func() []DataRange {
+				f := &file{img: img}
+				return f.buildChunkDataRanges(&inoCopy)
+			}
+		} else {
+			fi.dataRanges = b.buildDataRanges(ino)
+		}
+	}
+	// Release cached block - stat callers don't need inline data
+	if ino.cached != nil {
+		b.img.putBlock(ino.cached)
+		ino.cached = nil
+	}
+	return fi, nil
+}
+
+// buildDataRanges computes the physical data ranges for a regular file.
+func (b *file) buildDataRanges(ino *inode) []DataRange {
+	blockSize := int64(1 << b.img.sb.BlkSizeBits)
+	switch ino.inodeLayout {
+	case disk.LayoutFlatPlain:
+		dataOffset := int64(ino.inodeData) << b.img.sb.BlkSizeBits
+		return []DataRange{{Device: 0, Offset: dataOffset, Size: ino.size}}
+	case disk.LayoutFlatInline:
+		inodeAddr := b.img.metaStartPos() + int64(ino.nid)*disk.SizeInodeCompact
+		trailingAddr := inodeAddr + ino.flatDataOffset()
+		if ino.size <= blockSize {
+			return []DataRange{{Device: 0, Offset: trailingAddr, Size: ino.size}}
+		}
+		// Multi-block inline: earlier full blocks at dataBlkAddr, last block inline.
+		// headSize is the number of complete blocks before the inline tail, in bytes.
+		// ino.inodeData is the starting block address, not a block count.
+		headSize := ((ino.size - 1) / blockSize) * blockSize
+		tailSize := ino.size - headSize
+		var ranges []DataRange
+		if headSize > 0 {
+			dataOffset := int64(ino.inodeData) << b.img.sb.BlkSizeBits
+			ranges = append(ranges, DataRange{Device: 0, Offset: dataOffset, Size: headSize})
+		}
+		ranges = append(ranges, DataRange{Device: 0, Offset: trailingAddr, Size: tailSize})
+		return ranges
+	case disk.LayoutChunkBased:
+		return b.buildChunkDataRanges(ino)
+	}
+	return nil
+}
+
+// maxChunkIndexBytes is an upper bound on the chunk-index table we will
+// allocate for a single file. 64 MiB covers ~8 M chunks; no real EROFS image
+// should approach this, and it prevents allocation bombs from corrupt images.
+const maxChunkIndexBytes = 64 << 20 // 64 MiB
+
+// buildChunkDataRanges parses chunk indexes into DataRange entries covering
+// the complete logical layout of the file. The returned slice satisfies the
+// DataRange contract: entries are in logical-file order and their sizes sum
+// to ino.size exactly.
+//
+// Null/hole chunks are emitted as DataRange{Offset: -1, Size: ...} entries.
+// Consecutive null chunks coalesce into a single hole entry.
+// Adjacent data chunks that are physically contiguous on the same device
+// merge into one entry. Data chunks never merge across a hole boundary.
+//
+// The final entry (data or hole) has its Size trimmed to the file-tail length
+// so the invariant sum(Size) == ino.size holds precisely.
+func (b *file) buildChunkDataRanges(ino *inode) []DataRange {
+	chunkFmt := uint16(ino.inodeData)
+	if chunkFmt&disk.LayoutChunkFormatIndexes == 0 {
+		return nil
+	}
+	// 48-bit chunk addressing is not yet implemented; the null-chunk sentinel
+	// (blkLo == 0xFFFFFFFF) is only unambiguous in 32-bit address mode.
+	if chunkFmt&disk.LayoutChunkFormat48Bit != 0 {
+		return nil
+	}
+	chunkBits := b.img.sb.BlkSizeBits + uint8(chunkFmt&disk.LayoutChunkFormatBits)
+	nchunks := int((ino.size-1)>>chunkBits) + 1
+	chunkSize := int64(1) << chunkBits
+
+	inodeStart := b.img.metaStartPos() + int64(ino.nid)*disk.SizeInodeCompact
+	baseOffset := inodeStart + ino.flatDataOffset()
+	if baseOffset%8 != 0 {
+		baseOffset = (baseOffset + 7) & ^int64(7)
+	}
+	needed := int64(nchunks) * int64(disk.SizeChunkIndex)
+	if needed > maxChunkIndexBytes {
+		return nil
+	}
+	idxBuf := make([]byte, needed)
+	if _, err := b.img.meta.ReadAt(idxBuf, baseOffset); err != nil {
+		return nil
+	}
+
+	var ranges []DataRange
+	for i := range nchunks {
+		// Size of this logical chunk: full chunkSize for all but the last.
+		size := chunkSize
+		if i == nchunks-1 {
+			size = ino.size - int64(i)*chunkSize
+		}
+
+		off := i * disk.SizeChunkIndex
+		blkLo := binary.LittleEndian.Uint32(idxBuf[off+4 : off+8])
+		if ^blkLo == 0 {
+			// Null/hole chunk: coalesce with a preceding hole if possible.
+			if len(ranges) > 0 && ranges[len(ranges)-1].Offset == holeOffset {
+				ranges[len(ranges)-1].Size += size
+			} else {
+				ranges = append(ranges, DataRange{Offset: holeOffset, Size: size})
+			}
+			continue
+		}
+
+		blkHi := binary.LittleEndian.Uint16(idxBuf[off : off+2])
+		deviceID := binary.LittleEndian.Uint16(idxBuf[off+2:off+4]) & b.img.deviceIDMask
+		phys := (uint64(blkHi) << 32) | uint64(blkLo)
+		byteOffset := int64(phys) << b.img.sb.BlkSizeBits
+
+		// Merge with the previous entry if it is a data range that is
+		// physically contiguous on the same device.
+		if len(ranges) > 0 {
+			prev := &ranges[len(ranges)-1]
+			if prev.Offset != holeOffset && prev.Device == deviceID && prev.Offset+prev.Size == byteOffset {
+				prev.Size += size
+				continue
+			}
+		}
+		ranges = append(ranges, DataRange{Device: deviceID, Offset: byteOffset, Size: size})
+	}
+	return ranges
+}
+
+func (b *file) Stat() (fs.FileInfo, error) {
+	return b.statInfo()
+}
+
+func (b *file) Read(p []byte) (int, error) {
+	fi, err := b.readInfo()
+	if err != nil {
+		return 0, err
+	}
+
+	var n int
+	for len(p) > 0 {
+		if b.offset >= fi.size {
+			return n, io.EOF
+		}
+		blk, err := b.img.loadBlock(fi, b.offset)
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				err = io.EOF
+				b.offset += int64(n)
+			}
+			return n, err
+		}
+		buf := blk.bytes()
+		copied := copy(p, buf)
+		n += copied
+		p = p[copied:]
+		b.offset += int64(copied)
+
+		b.img.putBlock(blk)
+	}
+	return n, nil
+}
+
+func (b *file) Close() error {
+	if b.info != nil && b.info.cached != nil {
+		b.img.putBlock(b.info.cached)
+		b.info.cached = nil
+	}
+	return nil
+}
+
+type direntry struct {
+	file
+}
+
+func (d *direntry) Name() string {
+	return d.name
+}
+
+func (d *direntry) IsDir() bool {
+	return d.ftype.IsDir()
+}
+
+func (d *direntry) Type() fs.FileMode {
+	return d.ftype
+}
+
+func (d *direntry) Info() (fs.FileInfo, error) {
+	return d.statInfo()
+}
+
+type dir struct {
+	file
+
+	// bn is the current block to read from (relative to file start)
+	bn int
+
+	// consumed is how many have been returned in the current block
+	consumed uint16
+}
+
+func (d *dir) ReadDir(n int) ([]fs.DirEntry, error) {
+	fi, err := d.readInfo()
+	if err != nil {
+		return nil, fmt.Errorf("readInfo failed: %w", err)
+	}
+
+	var ents []fs.DirEntry
+	pos := int64(d.bn << d.img.sb.BlkSizeBits)
+	for pos < fi.size {
+		b, err := d.img.loadBlock(fi, pos)
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				break
+			}
+			return nil, err
+		}
+		buf := b.bytes()
+		if len(buf) < 12 {
+			d.img.putBlock(b)
+			break
+		}
+
+		var dirents [2]disk.Dirent
+
+		readN, err := binary.Decode(buf[:12], binary.LittleEndian, &dirents[0])
+		if err != nil {
+			d.img.putBlock(b)
+			return nil, fmt.Errorf("decode failed: %w", err)
+		}
+		if readN != 12 {
+			d.img.putBlock(b)
+			return nil, errors.New("invalid dirent: not fully decoded")
+		}
+
+		entryN := dirents[0].NameOff / disk.SizeDirent
+		bufLen := len(buf)
+
+		// Validate that NameOff is within bounds and dirent entries fit.
+		if int(dirents[0].NameOff) > bufLen || entryN == 0 {
+			d.img.putBlock(b)
+			return ents, fmt.Errorf("invalid dirent name offset %d (buf size %d): %w", dirents[0].NameOff, bufLen, ErrInvalid)
+		}
+
+		for i := uint16(0); i < entryN; i++ {
+			var name string
+			if i < entryN-1 {
+				start := int(disk.SizeDirent) * (int(i) + 1)
+				if start+int(disk.SizeDirent) > bufLen {
+					d.img.putBlock(b)
+					return ents, fmt.Errorf("dirent entry %d exceeds block: %w", i+1, ErrInvalid)
+				}
+				readN, err := binary.Decode(buf[start:start+int(disk.SizeDirent)], binary.LittleEndian, &dirents[1])
+				if err != nil {
+					d.img.putBlock(b)
+					return nil, fmt.Errorf("decode failed: %w", err)
+				}
+				if readN != 12 {
+					d.img.putBlock(b)
+					return nil, errors.New("invalid dirent: not fully decoded")
+				}
+				if int(dirents[0].NameOff) > bufLen || int(dirents[1].NameOff) > bufLen || dirents[1].NameOff < dirents[0].NameOff {
+					d.img.putBlock(b)
+					return ents, fmt.Errorf("invalid dirent name offset range [%d:%d] (buf size %d): %w",
+						dirents[0].NameOff, dirents[1].NameOff, bufLen, ErrInvalid)
+				}
+				name = string(buf[dirents[0].NameOff:dirents[1].NameOff])
+			} else {
+				if int(dirents[0].NameOff) > bufLen {
+					d.img.putBlock(b)
+					return ents, fmt.Errorf("invalid dirent name offset %d (buf size %d): %w", dirents[0].NameOff, bufLen, ErrInvalid)
+				}
+				// The last entry name extends to end of block;
+				// trim any NUL padding.
+				raw := buf[dirents[0].NameOff:]
+				if j := bytes.IndexByte(raw, 0); j >= 0 {
+					raw = raw[:j]
+				}
+				name = string(raw)
+			}
+
+			if i >= d.consumed && name != "." && name != ".." {
+				f := file{
+					img:   d.img,
+					name:  name,
+					nid:   dirents[0].Nid,
+					ftype: disk.EroFSFtypeToFileMode(dirents[0].FileType),
+				}
+				ents = append(ents, &direntry{f})
+				d.consumed = i + 1
+
+				if n > 0 && len(ents) == n {
+					if i == entryN-1 {
+						d.consumed = 0
+						d.bn++
+					}
+					d.img.putBlock(b)
+					return ents, nil
+				}
+			}
+
+			// Rotate next to current
+			dirents[0] = dirents[1]
+		}
+
+		d.img.putBlock(b)
+		d.consumed = 0
+		d.bn++
+		pos = int64(d.bn << d.img.sb.BlkSizeBits)
+	}
+
+	// Per fs.ReadDirFile contract: when n > 0 and we've reached the end
+	// of the directory, return io.EOF. When n <= 0, return all entries
+	// without io.EOF.
+	if n > 0 {
+		return ents, io.EOF
+	}
+	return ents, nil
+}
+
+// lookup searches for a directory entry by name using binary search.
+// EROFS directories are sorted by name both within and across blocks.
+// A cross-block binary search locates the correct block, then an
+// intra-block binary search finds the entry.
+// Returns the nid and file type if found, or fs.ErrNotExist if not.
+func (d *dir) lookup(target string) (uint64, fs.FileMode, error) {
+	fi, err := d.readInfo()
+	if err != nil {
+		return 0, 0, fmt.Errorf("readInfo failed: %w", err)
+	}
+
+	targetBytes := []byte(target)
+	blkSize := int64(1 << d.img.sb.BlkSizeBits)
+	nblocks := int((fi.size + blkSize - 1) / blkSize)
+
+	// Binary search across blocks: compare target against the first
+	// entry of each block to find which block may contain the target.
+	// The last loaded block is retained to avoid reloading it for the
+	// intra-block search.
+	var lastBlk *block
+	lastIdx := -1
+	lo, hi := 0, nblocks
+	for lo < hi {
+		mid := lo + (hi-lo)/2
+		pos := int64(mid) * blkSize
+		b, err := d.img.loadBlock(fi, pos)
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				hi = mid
+				continue
+			}
+			if lastBlk != nil {
+				d.img.putBlock(lastBlk)
+			}
+			return 0, 0, err
+		}
+		buf := b.bytes()
+		firstName, err := blockFirstName(buf)
+		if err != nil {
+			d.img.putBlock(b)
+			if lastBlk != nil {
+				d.img.putBlock(lastBlk)
+			}
+			return 0, 0, err
+		}
+
+		if bytes.Compare(firstName, targetBytes) <= 0 {
+			// This block's first entry <= target; keep it as candidate.
+			if lastBlk != nil {
+				d.img.putBlock(lastBlk)
+			}
+			lastBlk = b
+			lastIdx = mid
+			lo = mid + 1
+		} else {
+			d.img.putBlock(b)
+			hi = mid
+		}
+	}
+
+	// lastIdx is the last block whose first entry <= target.
+	// The target must be in that block if it exists.
+	if lastIdx < 0 {
+		return 0, 0, fs.ErrNotExist
+	}
+
+	buf := lastBlk.bytes()
+	nid, ftype, err := lookupBlock(buf, targetBytes)
+	d.img.putBlock(lastBlk)
+	return nid, ftype, err
+}
+
+// blockFirstName returns the name of the first entry in a directory block.
+func blockFirstName(buf []byte) ([]byte, error) {
+	if len(buf) < disk.SizeDirent {
+		return nil, fmt.Errorf("directory block too small: %w", ErrInvalid)
+	}
+	var first disk.Dirent
+	if _, err := binary.Decode(buf[:disk.SizeDirent], binary.LittleEndian, &first); err != nil {
+		return nil, fmt.Errorf("decode failed: %w", err)
+	}
+	entryN := first.NameOff / disk.SizeDirent
+	if entryN == 0 || int(first.NameOff) > len(buf) {
+		return nil, fmt.Errorf("invalid name offset %d: %w", first.NameOff, ErrInvalid)
+	}
+	var nameEnd uint16
+	if entryN > 1 {
+		nextOff := int(disk.SizeDirent) + 8
+		if nextOff+2 > len(buf) {
+			return nil, fmt.Errorf("next dirent name offset out of range: %w", ErrInvalid)
+		}
+		nameEnd = binary.LittleEndian.Uint16(buf[nextOff:])
+	} else {
+		nameEnd = uint16(len(buf))
+	}
+	if first.NameOff > nameEnd || int(nameEnd) > len(buf) {
+		return nil, fmt.Errorf("name range [%d:%d] out of bounds: %w", first.NameOff, nameEnd, ErrInvalid)
+	}
+	name := buf[first.NameOff:nameEnd]
+	// Trim NUL terminator if present
+	if i := bytes.IndexByte(name, 0); i >= 0 {
+		name = name[:i]
+	}
+	return name, nil
+}
+
+// blockDirent decodes the dirent at index i from buf and returns the
+// name bytes for that entry. entryN is the total number of entries.
+func blockDirent(buf []byte, i, entryN uint16) (disk.Dirent, []byte, error) {
+	var de disk.Dirent
+	off := int(disk.SizeDirent * i)
+	if off+disk.SizeDirent > len(buf) {
+		return de, nil, fmt.Errorf("dirent %d offset %d out of range: %w", i, off, ErrInvalid)
+	}
+	if _, err := binary.Decode(buf[off:off+disk.SizeDirent], binary.LittleEndian, &de); err != nil {
+		return de, nil, fmt.Errorf("decode dirent %d failed: %w", i, err)
+	}
+	var nameEnd uint16
+	if i < entryN-1 {
+		nextOff := int(disk.SizeDirent*(i+1)) + 8
+		if nextOff+2 > len(buf) {
+			return de, nil, fmt.Errorf("dirent %d next name offset out of range: %w", i, ErrInvalid)
+		}
+		nameEnd = binary.LittleEndian.Uint16(buf[nextOff:])
+	} else {
+		nameEnd = uint16(len(buf))
+	}
+	if de.NameOff > nameEnd || int(nameEnd) > len(buf) {
+		return de, nil, fmt.Errorf("dirent %d name range [%d:%d] out of bounds: %w", i, de.NameOff, nameEnd, ErrInvalid)
+	}
+	name := buf[de.NameOff:nameEnd]
+	// The last entry name may be NUL-terminated before the end of the block.
+	if i == entryN-1 {
+		if j := bytes.IndexByte(name, 0); j >= 0 {
+			name = name[:j]
+		}
+	}
+	return de, name, nil
+}
+
+// lookupBlock searches a single directory block for the target name
+// using binary search.
+func lookupBlock(buf, target []byte) (uint64, fs.FileMode, error) {
+	if len(buf) < disk.SizeDirent {
+		return 0, 0, fmt.Errorf("directory block too small: %w", ErrInvalid)
+	}
+	var first disk.Dirent
+	if _, err := binary.Decode(buf[:disk.SizeDirent], binary.LittleEndian, &first); err != nil {
+		return 0, 0, fmt.Errorf("decode failed: %w", err)
+	}
+	if first.NameOff%disk.SizeDirent != 0 {
+		return 0, 0, fmt.Errorf("invalid name offset %d not aligned to dirent size: %w", first.NameOff, ErrInvalid)
+	}
+	entryN := first.NameOff / disk.SizeDirent
+	if int(first.NameOff) > len(buf) {
+		return 0, 0, fmt.Errorf("name offset %d exceeds block size %d: %w", first.NameOff, len(buf), ErrInvalid)
+	}
+
+	lo, hi := uint16(0), entryN
+	for lo < hi {
+		mid := lo + (hi-lo)/2
+		de, name, err := blockDirent(buf, mid, entryN)
+		if err != nil {
+			return 0, 0, err
+		}
+		switch bytes.Compare(name, target) {
+		case 0:
+			return de.Nid, disk.EroFSFtypeToFileMode(de.FileType), nil
+		case -1:
+			lo = mid + 1
+		default:
+			hi = mid
+		}
+	}
+	return 0, 0, fs.ErrNotExist
+}
+
+// inode holds the parsed on-disk inode data needed for I/O operations.
+// It is an internal type and is not returned to callers directly.
+type inode struct {
+	name        string
+	nid         uint64
+	icsize      int8
+	xsize       int
+	inodeLayout uint8
+	inodeData   uint32
+	size        int64
+	mode        fs.FileMode
+	rawMode     uint16
+	uid         uint32
+	gid         uint32
+	nlink       int
+	mtime       uint64
+	mtimeNs     uint32
+	cached      *block
+}
+
+func (ino *inode) flatDataOffset() int64 {
+	// inode core size + xattr size
+	return int64(ino.icsize) + int64(ino.xsize)
+}
+
+// fileInfo implements [fs.FileInfo] and provides extended metadata
+// via type-assertable accessor methods. Callers can extract
+// Unix-style metadata without importing this package:
+//
+//	if u, ok := fi.(interface{ UID() uint32 }); ok { uid = u.UID() }
+type fileInfo struct {
+	name       string
+	size       int64
+	mode       fs.FileMode
+	mtime      uint64
+	mtimeNs    uint32
+	stat       *Stat
+	dataRanges []DataRange
+
+	// rangesOnce and rangesLoader support lazy computation of data ranges
+	// for chunk-based files (LayoutChunkBased). The loader performs a ReadAt
+	// to parse the chunk index, so it is deferred until the caller actually
+	// calls DataRange(). For flat layouts (FlatPlain, FlatInline), ranges
+	// are computed eagerly at stat time since they require no I/O.
+	rangesOnce   sync.Once
+	rangesLoader func() []DataRange
+}
+
+func (fi *fileInfo) Name() string       { return fi.name }
+func (fi *fileInfo) Size() int64        { return fi.size }
+func (fi *fileInfo) Mode() fs.FileMode  { return fi.mode }
+func (fi *fileInfo) IsDir() bool        { return fi.mode.IsDir() }
+func (fi *fileInfo) Sys() any           { return fi.stat }
+func (fi *fileInfo) ModTime() time.Time { return time.Unix(int64(fi.mtime), int64(fi.mtimeNs)) }
+func (fi *fileInfo) UID() uint32        { return fi.stat.UID }
+func (fi *fileInfo) GID() uint32        { return fi.stat.GID }
+func (fi *fileInfo) Ino() uint64        { return uint64(fi.stat.Ino) }
+func (fi *fileInfo) Nlink() uint64      { return uint64(fi.stat.Nlink) }
+func (fi *fileInfo) Rdev() uint64       { return uint64(fi.stat.Rdev) }
+
+// DataRange returns the physical data ranges for this file's uncompressed
+// content. Returns nil for compressed files, directories, symlinks, and
+// other non-regular entries.
+func (fi *fileInfo) DataRange() []DataRange {
+	if fi.rangesLoader != nil {
+		fi.rangesOnce.Do(func() {
+			fi.dataRanges = fi.rangesLoader()
+		})
+	}
+	return fi.dataRanges
+}
+
+// GetAllXattr returns all extended attributes.
+func (fi *fileInfo) GetAllXattr() map[string]string { return fi.stat.Xattrs }
+
+// GetXattr returns the value of a single extended attribute.
+func (fi *fileInfo) GetXattr(name string) (string, bool) {
+	v, ok := fi.stat.Xattrs[name]
+	return v, ok
+}
+func decodeSuperBlock(b [disk.SizeSuperBlock]byte, sb *disk.SuperBlock) error {
+	n, err := binary.Decode(b[:], binary.LittleEndian, sb)
+	if err != nil {
+		return err
+	}
+	if n != disk.SizeSuperBlock {
+		return fmt.Errorf("invalid super block: decoded %d bytes", n)
+	}
+	if sb.MagicNumber != disk.MagicNumber {
+		return fmt.Errorf("invalid super block: invalid magic number %x", sb.MagicNumber)
+	}
+	return nil
+}
diff --git a/vendor/github.com/erofs/go-erofs/format.go b/vendor/github.com/erofs/go-erofs/format.go
new file mode 100644
index 0000000..cb1d9d9
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/format.go
@@ -0,0 +1,137 @@
+package erofs
+
+import (
+	"io/fs"
+	"sort"
+	"strings"
+
+	"github.com/erofs/go-erofs/internal/disk"
+)
+
+// Standard xattr name prefix table (index → on-disk NameIndex).
+var xattrPrefixes = [...]struct {
+	index  uint8
+	prefix string
+}{
+	{1, "user."},
+	{2, "system.posix_acl_access."},
+	{3, "system.posix_acl_default."},
+	{4, "trusted."},
+	{5, "lustre."},
+	{6, "security."},
+}
+
+// xattrSplit splits a full xattr name into (NameIndex, suffix).
+func xattrSplit(name string) (uint8, string) {
+	for _, p := range xattrPrefixes {
+		if strings.HasPrefix(name, p.prefix) {
+			return p.index, name[len(p.prefix):]
+		}
+	}
+	return 0, name
+}
+
+// xattrEntrySize returns the on-disk size of a single xattr entry, padded to 4 bytes.
+func xattrEntrySize(name, value string) int {
+	_, suffix := xattrSplit(name)
+	sz := disk.SizeXattrEntry + len(suffix) + len(value)
+	if sz%4 != 0 {
+		sz = (sz + 3) & ^3
+	}
+	return sz
+}
+
+// calcXattrSize returns the total xattr area size (header + entries), or 0.
+func calcXattrSize(e *erofsEntry) int {
+	if len(e.xattrs) == 0 {
+		return 0
+	}
+	entriesSize := 0
+	for name, value := range e.xattrs {
+		entriesSize += xattrEntrySize(name, value)
+	}
+	return disk.SizeXattrBodyHeader + entriesSize
+}
+
+// xattrCount encodes the xattr area size into the inode XattrCount field.
+func xattrCount(xattrSize int) uint16 {
+	if xattrSize == 0 {
+		return 0
+	}
+	return uint16((xattrSize-disk.SizeXattrBodyHeader)/disk.SizeXattrEntry) + 1
+}
+
+// sortedXattrKeys returns xattr keys in deterministic order.
+func sortedXattrKeys(m map[string]string) []string {
+	keys := make([]string, 0, len(m))
+	for k := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	return keys
+}
+
+// inodeFormat builds the Format field: bit 0 = extended, bits 1-3 = layout.
+func inodeFormat(layout uint8, compact bool) uint16 {
+	f := uint16(layout) << 1
+	if !compact {
+		f |= 1 // bit 0 = extended
+	}
+	return f
+}
+
+// goModeToUnixMode converts Go fs.FileMode to Unix mode bits.
+func goModeToUnixMode(m fs.FileMode) uint16 {
+	mode := uint16(m.Perm())
+
+	if m&fs.ModeSetuid != 0 {
+		mode |= disk.StatTypeIsUID
+	}
+	if m&fs.ModeSetgid != 0 {
+		mode |= disk.StatTypeIsGID
+	}
+	if m&fs.ModeSticky != 0 {
+		mode |= disk.StatTypeIsVTX
+	}
+
+	switch m.Type() {
+	case 0: // regular file
+		mode |= disk.StatTypeReg
+	case fs.ModeDir:
+		mode |= disk.StatTypeDir
+	case fs.ModeSymlink:
+		mode |= disk.StatTypeSymlink
+	case fs.ModeDevice | fs.ModeCharDevice:
+		mode |= disk.StatTypeChrdev
+	case fs.ModeDevice:
+		mode |= disk.StatTypeBlkdev
+	case fs.ModeNamedPipe:
+		mode |= disk.StatTypeFifo
+	case fs.ModeSocket:
+		mode |= disk.StatTypeSock
+	}
+
+	return mode
+}
+
+// modeToFileType converts Unix mode bits to an EROFS file type.
+func modeToFileType(mode uint16) uint8 {
+	switch mode & disk.StatTypeMask {
+	case disk.StatTypeReg:
+		return disk.FileTypeReg
+	case disk.StatTypeDir:
+		return disk.FileTypeDir
+	case disk.StatTypeChrdev:
+		return disk.FileTypeChrdev
+	case disk.StatTypeBlkdev:
+		return disk.FileTypeBlkdev
+	case disk.StatTypeFifo:
+		return disk.FileTypeFifo
+	case disk.StatTypeSock:
+		return disk.FileTypeSock
+	case disk.StatTypeSymlink:
+		return disk.FileTypeSymlink
+	default:
+		return 0
+	}
+}
diff --git a/vendor/github.com/erofs/go-erofs/internal/builder/entry.go b/vendor/github.com/erofs/go-erofs/internal/builder/entry.go
new file mode 100644
index 0000000..abeaf97
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/internal/builder/entry.go
@@ -0,0 +1,34 @@
+// Package builder provides shared types for the mkfs sub-packages.
+package builder
+
+import "io"
+
+// Entry carries extended metadata for a filesystem entry.
+// Mode and Size come from fs.FileInfo; everything else lives here.
+type Entry struct {
+	UID, GID     uint32
+	Mtime        uint64
+	MtimeNs      uint32
+	Nlink        uint32
+	Rdev         uint32
+	Xattrs       map[string]string
+	LinkTarget   string
+	Data         io.Reader // file content (full-image mode)
+	Chunks       []Chunk   // physical block refs (metadata-only mode)
+	Contiguous   bool      // data blocks are contiguous; flat-plain is sufficient
+	MetadataOnly bool      // chunk-based layout even without chunks
+}
+
+// NullPhysicalBlock is the sentinel value for Chunk.PhysicalBlock that marks
+// a hole (a sparse region of zero bytes). It corresponds to the on-disk
+// EROFS null chunk encoding (StartBlkHi=0xFFFF, StartBlkLo=0xFFFFFFFF).
+const NullPhysicalBlock uint64 = ^uint64(0)
+
+// Chunk maps a range of logical blocks to physical blocks on a device.
+// If PhysicalBlock == NullPhysicalBlock the chunk is a hole: Count logical
+// blocks of zeros with no physical backing. DeviceID is ignored for holes.
+type Chunk struct {
+	PhysicalBlock uint64 // physical block address, or NullPhysicalBlock for a hole
+	Count         uint16 // number of contiguous blocks
+	DeviceID      uint16 // 0 = primary, 1+ = extra device; ignored for holes
+}
diff --git a/vendor/github.com/erofs/go-erofs/internal/disk/ftypes.go b/vendor/github.com/erofs/go-erofs/internal/disk/ftypes.go
new file mode 100644
index 0000000..e49621f
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/internal/disk/ftypes.go
@@ -0,0 +1,88 @@
+package disk
+
+import "io/fs"
+
+const (
+	FileTypeReg     = 1
+	FileTypeDir     = 2
+	FileTypeChrdev  = 3
+	FileTypeBlkdev  = 4
+	FileTypeFifo    = 5
+	FileTypeSock    = 6
+	FileTypeSymlink = 7
+
+	StatTypeMask    = 0170000 // Mask for the type bits
+	StatTypeReg     = 0100000 // Regular file
+	StatTypeDir     = 0040000 // Directory
+	StatTypeChrdev  = 0020000 // Character device
+	StatTypeBlkdev  = 0060000 // Block device
+	StatTypeFifo    = 0010000 // FIFO
+	StatTypeSock    = 0140000 // Socket
+	StatTypeSymlink = 0120000 // Symlink
+	StatTypeIsUID   = 0004000 // Setuid on execution
+	StatTypeIsGID   = 0002000 // Setgid on execution
+	StatTypeIsVTX   = 0001000 // Sticky bit
+)
+
+// Converts EroFS filetypes to Go FileMode
+func EroFSFtypeToFileMode(ftype uint8) fs.FileMode {
+	switch ftype {
+	case FileTypeDir:
+		return fs.ModeDir
+	case FileTypeChrdev:
+		return fs.ModeDevice | fs.ModeCharDevice
+	case FileTypeBlkdev:
+		return fs.ModeDevice
+	case FileTypeFifo:
+		return fs.ModeNamedPipe
+	case FileTypeSock:
+		return fs.ModeSocket
+	case FileTypeSymlink:
+		return fs.ModeSymlink
+	default:
+		return 0
+	}
+}
+
+func EroFSModeToGoFileMode(mode uint16) fs.FileMode {
+	var m fs.FileMode
+	m |= fs.FileMode(mode & 0777)
+	switch mode & StatTypeMask {
+	case StatTypeReg:
+	case StatTypeDir:
+		m |= fs.ModeDir
+	case StatTypeChrdev:
+		m |= fs.ModeDevice | fs.ModeCharDevice
+	case StatTypeBlkdev:
+		m |= fs.ModeDevice
+	case StatTypeFifo:
+		m |= fs.ModeNamedPipe
+	case StatTypeSock:
+		m |= fs.ModeSocket
+	case StatTypeSymlink:
+		m |= fs.ModeSymlink
+	default:
+		m |= fs.ModeIrregular // Unknown type, treat as irregular file
+	}
+	if mode&StatTypeIsUID != 0 {
+		m |= fs.ModeSetuid
+	}
+	if mode&StatTypeIsGID != 0 {
+		m |= fs.ModeSetgid
+	}
+	if mode&StatTypeIsVTX != 0 {
+		m |= fs.ModeSticky
+	}
+
+	return m
+}
+
+func RdevFromMode(mode uint16, inodeData uint32) uint32 {
+	switch mode & StatTypeMask {
+	case StatTypeChrdev, StatTypeBlkdev, StatTypeFifo, StatTypeSock:
+		// inodeData field is device number for some file types
+		return inodeData
+	default:
+		return 0 // Not a device type
+	}
+}
diff --git a/vendor/github.com/erofs/go-erofs/internal/disk/types.go b/vendor/github.com/erofs/go-erofs/internal/disk/types.go
new file mode 100644
index 0000000..5f654c6
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/internal/disk/types.go
@@ -0,0 +1,155 @@
+package disk
+
+const (
+	MagicNumber      = 0xe0f5e1e2
+	SuperBlockOffset = 1024
+
+	FeatureIncompatLZ4_0Padding         = 0x1
+	FeatureIncompatChunkedFile          = 0x4
+	FeatureIncompatDeviceTable          = 0x8
+	FeatureIncompatFragments            = 0x20
+	FeatureIncompatXattrPrefixes        = 0x40
+	FeatureIncompatAll           uint32 = FeatureIncompatLZ4_0Padding |
+		FeatureIncompatChunkedFile | FeatureIncompatDeviceTable |
+		FeatureIncompatFragments | FeatureIncompatXattrPrefixes
+
+	SizeSuperBlock      = 128
+	SizeInodeCompact    = 32
+	SizeInodeExtended   = 64
+	SizeDirent          = 12
+	SizeXattrBodyHeader = 12
+	SizeXattrEntry      = 4
+	SizeDeviceSlot      = 128
+	SizeChunkIndex      = 8
+
+	LayoutFlatPlain         = 0
+	LayoutCompressedFull    = 1
+	LayoutFlatInline        = 2
+	LayoutCompressedCompact = 3
+	LayoutChunkBased        = 4
+
+	LayoutChunkFormatBits    = 0x001F
+	LayoutChunkFormatIndexes = 0x0020
+	LayoutChunkFormat48Bit   = 0x0040
+)
+
+// SuperBlock represents the EROFS on-disk superblock.
+// See: https://docs.kernel.org/filesystems/erofs.html#on-disk-layout
+type SuperBlock struct {
+	MagicNumber      uint32
+	Checksum         uint32
+	FeatureCompat    uint32
+	BlkSizeBits      uint8
+	ExtSlots         uint8
+	RootNid          uint16
+	Inos             uint64
+	BuildTime        uint64
+	BuildTimeNs      uint32
+	Blocks           uint32
+	MetaBlkAddr      uint32
+	XattrBlkAddr     uint32
+	UUID             [16]uint8
+	VolumeName       [16]uint8
+	FeatureIncompat  uint32
+	ComprAlgs        uint16
+	ExtraDevices     uint16
+	DevtSlotOff      uint16
+	DirBlkBits       uint8
+	XattrPrefixCount uint8
+	XattrPrefixStart uint32
+	PackedNid        uint64 // Nid of the special "packed" inode for shared data/prefixes
+	XattrFilterRes   uint8
+	Reserved         [23]uint8
+}
+
+// InodeCompact represents the 32-byte on-disk compact inode.
+type InodeCompact struct {
+	Format     uint16 // i_format
+	XattrCount uint16 // i_xattr_icount
+	Mode       uint16 // i_mode
+	Nlink      uint16 // i_nlink
+	Size       uint32 // i_size
+	Reserved   uint32 // i_reserved
+	InodeData  uint32 // i_u (i_raw_blkaddr, i_rdev, etc.)
+	Inode      uint32 // i_ino
+	UID        uint16 // i_uid
+	GID        uint16 // i_gid
+	Reserved2  uint32 // i_reserved2
+}
+
+// InodeExtended represents the 64-byte on-disk extended inode.
+type InodeExtended struct {
+	Format     uint16 // i_format
+	XattrCount uint16 // i_xattr_icount
+	Mode       uint16 // i_mode
+	Reserved   uint16 // i_reserved
+	Size       uint64 // i_size
+	InodeData  uint32 // i_u (i_raw_blkaddr, i_rdev, etc.)
+	Inode      uint32 // i_ino
+	UID        uint32 // i_uid
+	GID        uint32 // i_gid
+	Mtime      uint64 // i_mtime
+	MtimeNs    uint32 // i_mtime_nsec
+	Nlink      uint32 // i_nlink
+	Reserved2  [16]uint8
+}
+
+type Dirent struct {
+	Nid      uint64
+	NameOff  uint16
+	FileType uint8
+	Reserved uint8
+}
+
+// XattrHeader is the header after an inode containing xattr information
+//
+// Original definition:
+// inline xattrs (n == i_xattr_icount):
+// erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
+//
+//	12 bytes           /                   \
+//	                  /                     \
+//	                 /-----------------------\
+//	                 |  erofs_xattr_entries+ |
+//	                 +-----------------------+
+//
+// inline xattrs must starts in erofs_xattr_ibody_header,
+// for read-only fs, no need to introduce h_refcount
+// Actual name is prefix | long prefix (prefix + infix) + name
+type XattrHeader struct {
+	NameFilter  uint32 // bit value 1 indicate not-present
+	SharedCount uint8
+	Reserved    [7]uint8
+}
+
+type XattrEntry struct {
+	NameLen   uint8  // length of name
+	NameIndex uint8  // index of name in XattrHeader, 0x80 set indicates long prefix at index&0x7F + XattrPrefixStart
+	ValueLen  uint16 // length of value
+	// Name+Value
+}
+
+type XattrLongPrefixitem struct {
+	PrefixAddr uint32 // address of the long prefix
+	PrefixLen  uint8  // length of the long prefix
+}
+
+type XattrLongPrefix struct {
+	BaseIndex uint8 // short xattr name prefix index
+	// Infix part after short prefix
+}
+
+type InodeChunkIndex struct {
+	StartBlkHi uint16 // part of 48-bit support (not yet implemented)
+	DeviceID   uint16
+	StartBlkLo uint32
+}
+
+// DeviceSlot represents the on-disk device table entry (erofs_deviceslot).
+// See: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/erofs/erofs_fs.h
+type DeviceSlot struct {
+	Tag           [64]uint8 // digest(sha256), etc.
+	Blocks        uint32    // total fs blocks of this device
+	MappedBlkAddr uint32    // map starting at mapped_blkaddr
+	Reserved      [56]uint8
+}
diff --git a/vendor/github.com/erofs/go-erofs/layout.go b/vendor/github.com/erofs/go-erofs/layout.go
new file mode 100644
index 0000000..677366d
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/layout.go
@@ -0,0 +1,225 @@
+package erofs
+
+import (
+	"sort"
+
+	"github.com/erofs/go-erofs/internal/disk"
+)
+
+// planLayout assigns NIDs and determines trailing data sizes for all entries.
+func (w *erofsWriter) planLayout(root *erofsEntry) {
+	// Collect all entries in a deterministic order (DFS, pre-order).
+	// DFS keeps directory contents close to their parent inode,
+	// improving locality for operations like find and ls -lR.
+	// Hardlink alias entries (linkTo != nil) are skipped — they share the
+	// NID of the canonical entry and do not get their own inode slot.
+	w.entries = nil
+	var walk func(e *erofsEntry)
+	walk = func(e *erofsEntry) {
+		if e.linkTo != nil {
+			return // alias: no inode, NID comes from linkTo
+		}
+		w.entries = append(w.entries, e)
+		if e.mode&disk.StatTypeMask == disk.StatTypeDir {
+			sort.Slice(e.children, func(i, j int) bool {
+				return e.children[i].name < e.children[j].name
+			})
+			for _, c := range e.children {
+				walk(c)
+			}
+		}
+	}
+	walk(root)
+
+	w.totalInodes = uint64(len(w.entries))
+
+	// Block 0 holds: 1024-byte pad + 128-byte superblock + device slot(s) + padding
+	// MetaBlkAddr is set later by write() depending on the on-disk layout.
+
+	// Assign NIDs sequentially.
+	// NID = byte offset from metaStartPos / 32.
+	// Each extended inode is 64 bytes = 2 NID slots.
+	// Trailing data follows and is padded to 32-byte boundary.
+	currentOff := 0 // byte offset from metaStartPos
+	for _, e := range w.entries {
+		e.nid = uint64(currentOff / 32)
+		e.xattrSize = calcXattrSize(e)
+
+		// Decide compact (32B) vs extended (64B) inode.
+		e.compact = e.uid <= 0xFFFF && e.gid <= 0xFFFF &&
+			e.nlink <= 0xFFFF && e.size <= 0xFFFFFFFF &&
+			e.mtime == w.buildTime && e.mtimeNs == 0
+
+		inodeSize := disk.SizeInodeExtended
+		if e.compact {
+			inodeSize = disk.SizeInodeCompact
+		}
+
+		// The inode header region is inode core + xattr area.
+		// Trailing data (dirents, chunk indexes, inline data) follows.
+		headerSize := inodeSize + e.xattrSize
+
+		// Determine layout
+		switch e.mode & disk.StatTypeMask {
+		case disk.StatTypeReg:
+			switch {
+			case e.size == 0 && len(e.chunks) == 0 && e.data == nil && !e.metadataOnly:
+				e.layout = disk.LayoutFlatPlain
+			case len(e.chunks) > 0 || e.metadataOnly:
+				e.layout = disk.LayoutChunkBased
+				if e.contiguous {
+					e.chunkBits = w.minChunkBits(e.size)
+				}
+			default:
+				// Full-image mode: decide inline vs plain
+				if int(e.size) <= w.blockSize-headerSize {
+					inBlockOff := (currentOff + headerSize) % w.blockSize
+					if inBlockOff+int(e.size) <= w.blockSize {
+						e.layout = disk.LayoutFlatInline
+					} else {
+						e.layout = disk.LayoutFlatPlain
+					}
+				} else {
+					e.layout = disk.LayoutFlatPlain
+				}
+			}
+		case disk.StatTypeDir:
+			direntDataSize := w.direntDataSize(e)
+			inBlockOff := (currentOff + headerSize) % w.blockSize
+			if direntDataSize > 0 && inBlockOff+direntDataSize <= w.blockSize {
+				e.layout = disk.LayoutFlatInline
+			} else {
+				e.layout = disk.LayoutFlatPlain
+			}
+		case disk.StatTypeSymlink:
+			inBlockOff := (currentOff + headerSize) % w.blockSize
+			if len(e.symTarget) > 0 && inBlockOff+len(e.symTarget) <= w.blockSize {
+				e.layout = disk.LayoutFlatInline
+			} else {
+				e.layout = disk.LayoutFlatPlain
+			}
+		default:
+			// Device files, fifos, sockets
+			e.layout = disk.LayoutFlatPlain
+		}
+
+		// Recalculate trailing size now that layout is decided
+		e.trailingSize = w.calcTrailingSize(e)
+
+		totalInodeSize := headerSize + e.trailingSize
+		// Pad to 32-byte boundary
+		if totalInodeSize%32 != 0 {
+			totalInodeSize = (totalInodeSize + 31) & ^31
+		}
+
+		// Check block boundary: inode core must not cross a block boundary
+		blockOff := currentOff % w.blockSize
+		if blockOff+inodeSize > w.blockSize {
+			// Align to next block
+			currentOff = (currentOff + w.blockSize - 1) & ^(w.blockSize - 1)
+			e.nid = uint64(currentOff / 32)
+		}
+
+		// Also check that trailing data doesn't cross block boundary for inline layouts
+		if e.layout == disk.LayoutFlatInline {
+			blockOff = currentOff % w.blockSize
+			if blockOff+headerSize+e.trailingSize > w.blockSize {
+				// Fall back to flat-plain (data would cross block boundary)
+				e.layout = disk.LayoutFlatPlain
+				e.trailingSize = w.calcTrailingSize(e)
+				totalInodeSize = headerSize + e.trailingSize
+				if totalInodeSize%32 != 0 {
+					totalInodeSize = (totalInodeSize + 31) & ^31
+				}
+			}
+		}
+
+		currentOff += totalInodeSize
+	}
+
+	w.rootNid = root.nid
+}
+
+// calcTrailingSize returns the number of bytes following the 64-byte inode.
+func (w *erofsWriter) calcTrailingSize(e *erofsEntry) int {
+	switch e.mode & disk.StatTypeMask {
+	case disk.StatTypeReg:
+		if e.layout == disk.LayoutChunkBased {
+			if e.size == 0 && len(e.chunks) == 0 {
+				return 0
+			}
+			cs := w.entryChunkSize(e)
+			nchunks := (int(e.size) + cs - 1) / cs
+			return nchunks * disk.SizeChunkIndex
+		}
+		if e.layout == disk.LayoutFlatInline {
+			return int(e.size)
+		}
+		return 0
+	case disk.StatTypeDir:
+		if e.layout == disk.LayoutFlatInline {
+			return w.direntDataSize(e)
+		}
+		return 0
+	case disk.StatTypeSymlink:
+		if e.layout == disk.LayoutFlatInline {
+			return len(e.symTarget)
+		}
+		return 0
+	default:
+		return 0
+	}
+}
+
+// direntNames returns the sorted list of dirent names for a directory,
+// including "." and "..". EROFS requires dirents within each block to
+// be sorted alphabetically.
+func direntNames(e *erofsEntry) []string {
+	names := make([]string, 0, len(e.children)+2)
+	names = append(names, ".", "..")
+	for _, c := range e.children {
+		names = append(names, c.name)
+	}
+	sort.Strings(names)
+	return names
+}
+
+// direntDataSize calculates the serialized EROFS dirent data size for a directory.
+// For multi-block directories, this includes inter-block padding.
+func (w *erofsWriter) direntDataSize(e *erofsEntry) int {
+	names := direntNames(e)
+	nEntries := len(names)
+	if len(e.children) == 0 {
+		// Empty dir still needs "." and ".." entries
+		return 2*disk.SizeDirent + 1 + 2
+	}
+
+	totalSize := 0
+	i := 0
+	for i < nEntries {
+		blockUsed := 0
+		start := i
+		nameSize := 0
+		for j := i; j < nEntries; j++ {
+			headerSize := (j - start + 1) * disk.SizeDirent
+			nameSize += len(names[j])
+			needed := headerSize + nameSize
+			if needed > w.blockSize {
+				break
+			}
+			blockUsed = needed
+			i = j + 1
+		}
+		if i == start {
+			blockUsed = disk.SizeDirent + len(names[i])
+			i++
+		}
+		// Pad non-final blocks to block boundary
+		if i < nEntries && blockUsed%w.blockSize != 0 {
+			blockUsed = (blockUsed + w.blockSize - 1) & ^(w.blockSize - 1)
+		}
+		totalSize += blockUsed
+	}
+
+	return totalSize
+}
diff --git a/vendor/github.com/erofs/go-erofs/mkfs.go b/vendor/github.com/erofs/go-erofs/mkfs.go
new file mode 100644
index 0000000..208950a
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/mkfs.go
@@ -0,0 +1,1825 @@
+package erofs
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"math/bits"
+	"os"
+	"path"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/erofs/go-erofs/internal/builder"
+	"github.com/erofs/go-erofs/internal/disk"
+)
+
+// errDirNotEmpty is returned by Remove when the named path is a non-empty
+// directory. Mirrors the behavior of os.Remove (which returns ENOTEMPTY).
+var errDirNotEmpty = errors.New("directory not empty")
+
+// --- Exported types ---
+
+// Writer is a writable filesystem that produces an EROFS image on Close.
+// Files are added via Create, Mkdir, Symlink, and Mknod, then finalized
+// by calling Close which serializes the complete EROFS image.
+type Writer struct {
+	out          io.WriteSeeker
+	closed       bool
+	blockSize    int    // 0 = unset, resolved to defaultBlockSize in Close
+	buildTime    uint64 // from WithBuildTime or buildTimer
+	buildTimeNs  uint32
+	hasBuildTime bool
+	wErr         error               // sticky error: once set, all subsequent ops return it
+	root         *fsEntry            // root directory
+	byPath       map[string]*fsEntry // path → entry (all types)
+
+	devices []uint64 // per-device block counts (one per MetadataOnly source)
+
+	// Per-CopyFrom state, reset at the start of each CopyFrom call.
+	copyMetadataOnly bool   // metadata-only for current CopyFrom
+	copyMerge        bool   // merge mode: apply whiteouts
+	copyDeviceID     uint16 // device ID assigned to current MetadataOnly CopyFrom
+
+	dataFile *os.File // external data file (nil = spool mode)
+	dataOff  int64    // current byte offset in data file
+	spool    *os.File // temp spool (created lazily)
+	spoolOff int64    // current byte offset in spool
+	tempDir  string   // from WithTempDir
+	cpBuf    []byte   // shared buffer for io.Copy into File
+	padBuf   []byte   // shared zero buffer for padding (block-sized, lazy)
+}
+
+// File is a writable regular file returned by Writer.Create.
+// Data is written via Write or ReadFrom, then committed with Close.
+type File struct {
+	fs           *Writer
+	entry        *fsEntry
+	dataStartOff int64 // byte offset where this file's data begins
+	written      int64
+	closed       bool
+}
+
+// CreateOpt configures EROFS image creation.
+type CreateOpt func(*createOptions)
+
+// CopyOpt configures a CopyFrom operation.
+type CopyOpt func(*Writer)
+
+// --- Constructor ---
+
+// Create returns a Writer that produces an EROFS image on Close.
+// Options configure build time, data file, and temp directory.
+func Create(out io.WriteSeeker, opts ...CreateOpt) *Writer {
+	var o createOptions
+	for _, opt := range opts {
+		opt(&o)
+	}
+
+	root := &fsEntry{
+		path: "/",
+		mode: disk.StatTypeDir | 0o755,
+	}
+	fsys := &Writer{
+		out:          out,
+		buildTime:    o.buildTime,
+		buildTimeNs:  o.buildTimeNs,
+		hasBuildTime: o.hasBuildTime,
+		root:         root,
+		byPath:       map[string]*fsEntry{"/": root},
+		dataFile:     o.dataFile,
+		tempDir:      o.tempDir,
+	}
+
+	if o.blockSize != 0 {
+		if err := fsys.setBlockSize(o.blockSize); err != nil {
+			fsys.wErr = err
+		}
+	}
+
+	if o.dataFile != nil {
+		// Reserve device slot 0 (DeviceID=1) for the data file.
+		// MetadataOnly CopyFrom device IDs will start at slot 1+.
+		// The reserved slot is filled in with the actual block count at Close.
+		fsys.devices = append(fsys.devices, 0)
+		off, err := o.dataFile.Seek(0, io.SeekEnd)
+		if err == nil {
+			fsys.dataOff = off
+		}
+	}
+
+	return fsys
+}
+
+// --- CopyOpt functions ---
+
+// MetadataOnly configures the current CopyFrom to emit only metadata.
+// Regular files with pre-existing chunk mappings use chunk-based layout
+// referencing an external device; file data is not copied.
+func MetadataOnly() CopyOpt {
+	return func(w *Writer) {
+		w.copyMetadataOnly = true
+	}
+}
+
+// Merge enables overlay merge semantics for the current CopyFrom.
+// AUFS-style whiteout files (.wh.<name>) delete the named entry from
+// prior layers, and opaque markers (.wh..wh..opq) delete all children
+// of their parent directory. The whiteout entries themselves are not
+// added to the image.
+//
+// When using Merge with a source containing AUFS whiteout files, do not
+// pre-convert them; the Writer processes raw whiteout entries directly.
+func Merge() CopyOpt {
+	return func(w *Writer) {
+		w.copyMerge = true
+	}
+}
+
+// --- CreateOpt functions ---
+
+// WithBlockSize sets the filesystem block size. The value must be a power
+// of two between 512 and 64 KiB. When unset the default is 4096.
+// An invalid size causes subsequent Writer operations to return an error.
+// If CopyFrom is called with a source that declares a different block size,
+// CopyFrom returns an error.
+func WithBlockSize(n int) CreateOpt {
+	return func(o *createOptions) {
+		o.blockSize = n
+	}
+}
+
+// WithBuildTime sets the filesystem build timestamp.
+func WithBuildTime(sec uint64, nsec uint32) CreateOpt {
+	return func(o *createOptions) {
+		o.buildTime = sec
+		o.buildTimeNs = nsec
+		o.hasBuildTime = true
+	}
+}
+
+// WithDataFile sets an external data file for metadata-only mode.
+// File.Write appends to this file at block-aligned offsets; chunk
+// indexes reference those blocks with DeviceID=1.
+func WithDataFile(f *os.File) CreateOpt {
+	return func(o *createOptions) {
+		o.dataFile = f
+	}
+}
+
+// WithTempDir overrides the temp directory for the spool file.
+// Only used when no data file is provided.
+func WithTempDir(dir string) CreateOpt {
+	return func(o *createOptions) {
+		o.tempDir = dir
+	}
+}
+
+// --- Writer entry methods ---
+
+// Create creates a regular file with default mode 0644. The caller must
+// Close the returned File.
+func (fsys *Writer) Create(name string) (*File, error) {
+	if fsys.wErr != nil {
+		return nil, fsys.wErr
+	}
+	name = cleanPath(name)
+	if name == "/" {
+		return nil, fmt.Errorf("mkfs: cannot create file at root")
+	}
+	if err := fsys.checkPath(name); err != nil {
+		return nil, err
+	}
+
+	fsys.ensureParent(name)
+
+	e := &fsEntry{
+		path: name,
+		mode: disk.StatTypeReg | 0o644,
+	}
+	fsys.addChild(e)
+
+	f := &File{
+		fs:    fsys,
+		entry: e,
+	}
+
+	if fsys.dataFile != nil {
+		f.dataStartOff = fsys.dataOff
+		e.dataStartOff = fsys.dataOff
+	} else {
+		if err := fsys.ensureSpool(); err != nil {
+			return nil, err
+		}
+		f.dataStartOff = fsys.spoolOff
+		e.spoolOff = fsys.spoolOff
+		e.dataStartOff = fsys.spoolOff
+	}
+
+	return f, nil
+}
+
+// Mkdir creates a directory. Only permission bits from perm are used;
+// type bits are forced to directory. Mkdir("/", perm) sets root permissions.
+func (fsys *Writer) Mkdir(name string, perm fs.FileMode) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	name = cleanPath(name)
+	if name == "/" {
+		fsys.root.mode = disk.StatTypeDir | uint16(perm.Perm())
+		return nil
+	}
+	if err := fsys.checkPath(name); err != nil {
+		return err
+	}
+
+	fsys.ensureParent(name)
+
+	e := &fsEntry{
+		path: name,
+		mode: disk.StatTypeDir | uint16(perm.Perm()),
+	}
+	fsys.addChild(e)
+
+	return nil
+}
+
+// Symlink creates newname as a symbolic link to oldname (mode 0777).
+func (fsys *Writer) Symlink(oldname, newname string) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	newname = cleanPath(newname)
+	if newname == "/" {
+		return fmt.Errorf("mkfs: cannot create symlink at root")
+	}
+	if err := fsys.checkPath(newname); err != nil {
+		return err
+	}
+
+	fsys.ensureParent(newname)
+
+	e := &fsEntry{
+		path:       newname,
+		mode:       disk.StatTypeSymlink | 0o777,
+		linkTarget: oldname,
+	}
+	fsys.addChild(e)
+
+	return nil
+}
+
+// Mknod creates a device, FIFO, or socket. mode must include type bits
+// (e.g. disk.StatTypeChrdev | 0o666).
+func (fsys *Writer) Mknod(name string, mode uint16, rdev uint32) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	name = cleanPath(name)
+	if name == "/" {
+		return fmt.Errorf("mkfs: cannot mknod at root")
+	}
+	if err := fsys.checkPath(name); err != nil {
+		return err
+	}
+
+	fsys.ensureParent(name)
+
+	e := &fsEntry{
+		path: name,
+		mode: mode,
+		rdev: rdev,
+	}
+	fsys.addChild(e)
+
+	return nil
+}
+
+// Link creates newname as a hard link to oldname. oldname must refer to an
+// existing regular file, character device, block device, FIFO, or socket —
+// directories and symlinks cannot be used as hard-link targets.
+//
+// Both paths may be in different directories; newname's parent directory must
+// already exist. Link returns an error if oldname is not found, if newname
+// already exists, or if the target is a directory or symlink.
+//
+// After Link, both paths share the same inode in the produced EROFS image.
+// The computed nlink on oldname's inode equals 1 + the number of Link calls
+// that targeted it (transitively). SetNlink must not be called on any path
+// participating in a hardlink group.
+func (fsys *Writer) Link(oldname, newname string) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	oldname = cleanPath(oldname)
+	newname = cleanPath(newname)
+
+	if oldname == newname {
+		return fmt.Errorf("mkfs: Link: oldname and newname are the same: %q", oldname)
+	}
+	if newname == "/" {
+		return fmt.Errorf("mkfs: Link: cannot create hardlink at root")
+	}
+	if fsys.closed {
+		return fmt.Errorf("mkfs: FS is closed")
+	}
+
+	// Resolve the target. It may itself be a hardlink alias — in that case,
+	// follow to the canonical entry so all aliases share one fsEntry.
+	target, ok := fsys.byPath[oldname]
+	if !ok {
+		return fmt.Errorf("mkfs: Link: %q not found", oldname)
+	}
+	if target.linkedTo != nil {
+		target = target.linkedTo
+	}
+
+	// Validate target type: no directories, no symlinks.
+	typ := target.mode & disk.StatTypeMask
+	if typ == disk.StatTypeDir {
+		return fmt.Errorf("mkfs: Link: %q is a directory", oldname)
+	}
+	if typ == disk.StatTypeSymlink {
+		return fmt.Errorf("mkfs: Link: %q is a symlink", oldname)
+	}
+
+	// newname must not already exist.
+	if _, exists := fsys.byPath[newname]; exists {
+		return fmt.Errorf("mkfs: Link: %q already exists", newname)
+	}
+
+	// Ensure newname's parent directory exists.
+	fsys.ensureParent(newname)
+
+	// Register the alias. The alias fsEntry exists only as a byPath/tree entry;
+	// it does not duplicate data — it points back to the canonical entry.
+	alias := &fsEntry{
+		path:     newname,
+		linkedTo: target,
+	}
+	fsys.addChild(alias)
+
+	// Record the alias path on the canonical entry and bump its nlink.
+	target.hardlinks = append(target.hardlinks, newname)
+	// nlink is recomputed from len(hardlinks)+1 in buildErofsTree; clear any
+	// previously set nlink so it doesn't interfere.
+	target.nlinkSet = false
+
+	return nil
+}
+
+// --- Writer metadata methods ---
+
+// Chmod sets permission bits on the named path, preserving type bits.
+func (fsys *Writer) Chmod(name string, mode fs.FileMode) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	e, err := fsys.lookup(name)
+	if err != nil {
+		return err
+	}
+	perm := goModeToUnixMode(mode) & 0o7777
+	e.mode = (e.mode & disk.StatTypeMask) | perm
+	return nil
+}
+
+// Chown sets the owner UID and GID on the named path.
+func (fsys *Writer) Chown(name string, uid, gid int) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	e, err := fsys.lookup(name)
+	if err != nil {
+		return err
+	}
+	e.uid = uint32(uid)
+	e.gid = uint32(gid)
+	return nil
+}
+
+// Chtimes sets the access and modification times on the named path.
+// EROFS only stores mtime; atime is retained for read-back before Close.
+func (fsys *Writer) Chtimes(name string, atime time.Time, mtime time.Time) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	e, err := fsys.lookup(name)
+	if err != nil {
+		return err
+	}
+	e.atime = uint64(atime.Unix())
+	e.atimeNs = uint32(atime.Nanosecond())
+	e.mtime = uint64(mtime.Unix())
+	e.mtimeNs = uint32(mtime.Nanosecond())
+	return nil
+}
+
+// Setxattr sets an extended attribute on the named path.
+func (fsys *Writer) Setxattr(name, attr, value string) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	e, err := fsys.lookup(name)
+	if err != nil {
+		return err
+	}
+	if e.xattrs == nil {
+		e.xattrs = make(map[string]string)
+	}
+	e.xattrs[attr] = value
+	return nil
+}
+
+// SetNlink overrides the computed link count on the named path.
+// SetNlink must not be called on any path that participates in a hardlink
+// group created via Link; use Link to manage link counts in that case.
+func (fsys *Writer) SetNlink(name string, nlink uint32) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	e, err := fsys.lookup(name)
+	if err != nil {
+		return err
+	}
+	// Resolve alias → canonical so the check applies to the real entry.
+	if e.linkedTo != nil {
+		e = e.linkedTo
+	}
+	if len(e.hardlinks) > 0 {
+		return fmt.Errorf("mkfs: SetNlink: %q is part of a hardlink group; use Link() to manage link counts", name)
+	}
+	e.nlink = nlink
+	e.nlinkSet = true
+	return nil
+}
+
+// Remove removes the named path from the writer's tree. It mirrors the
+// semantics of [os.Root.Remove]: it is non-recursive, returns
+// [fs.ErrNotExist] (wrapped in [fs.PathError]) if the path does not exist,
+// and returns an error if the path is a non-empty directory.
+//
+// Removing a hardlink alias only removes the dirent at that path; the
+// underlying inode and other aliases are preserved. Removing the canonical
+// path of a hardlink group with surviving aliases promotes the first
+// remaining alias to canonical (POSIX unlink semantics).
+//
+// Remove cannot be used to delete the root.
+//
+// Recursive removal can be implemented by the caller by listing the
+// directory with [fs.ReadDir] (via [Writer.Open]) and calling Remove on
+// each descendant before removing the directory itself.
+func (fsys *Writer) Remove(name string) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	if fsys.closed {
+		return fmt.Errorf("mkfs: FS is closed")
+	}
+	name = cleanPath(name)
+	if name == "/" {
+		return &fs.PathError{Op: "remove", Path: name, Err: fmt.Errorf("cannot remove root")}
+	}
+	e, ok := fsys.byPath[name]
+	if !ok {
+		return &fs.PathError{Op: "remove", Path: name, Err: fs.ErrNotExist}
+	}
+	// Non-empty directory check.
+	if e.mode&disk.StatTypeMask == disk.StatTypeDir {
+		for _, c := range e.children {
+			if !c.removed {
+				return &fs.PathError{Op: "remove", Path: name, Err: errDirNotEmpty}
+			}
+		}
+	}
+	fsys.unlinkOne(e)
+	return nil
+}
+
+// unlinkOne removes a single entry from the writer's tree, applying POSIX
+// unlink semantics for hardlinks. The entry must already be located in
+// byPath. Callers are responsible for any caller-visible preconditions
+// (e.g. empty-directory check).
+func (fsys *Writer) unlinkOne(e *fsEntry) {
+	switch {
+	case e.linkedTo != nil:
+		// Alias: drop the alias path from the canonical's hardlinks list.
+		canonical := e.linkedTo
+		for i, p := range canonical.hardlinks {
+			if p == e.path {
+				canonical.hardlinks = append(canonical.hardlinks[:i], canonical.hardlinks[i+1:]...)
+				break
+			}
+		}
+	case len(e.hardlinks) > 0:
+		// Canonical with surviving aliases: promote first alias.
+		newCanonicalPath := e.hardlinks[0]
+		remaining := e.hardlinks[1:]
+		newCanonical := fsys.byPath[newCanonicalPath]
+		if newCanonical != nil {
+			// Copy data-bearing fields from old canonical to the alias entry.
+			newCanonical.mode = e.mode
+			newCanonical.uid = e.uid
+			newCanonical.gid = e.gid
+			newCanonical.atime = e.atime
+			newCanonical.atimeNs = e.atimeNs
+			newCanonical.mtime = e.mtime
+			newCanonical.mtimeNs = e.mtimeNs
+			newCanonical.size = e.size
+			newCanonical.rdev = e.rdev
+			newCanonical.xattrs = e.xattrs
+			newCanonical.linkTarget = e.linkTarget
+			newCanonical.chunks = e.chunks
+			newCanonical.contiguous = e.contiguous
+			newCanonical.spoolOff = e.spoolOff
+			newCanonical.dataStartOff = e.dataStartOff
+			newCanonical.fileClosed = e.fileClosed
+			newCanonical.directData = e.directData
+			newCanonical.metadataOnly = e.metadataOnly
+			newCanonical.nlink = e.nlink
+			newCanonical.nlinkSet = e.nlinkSet
+			newCanonical.linkedTo = nil
+			newCanonical.hardlinks = remaining
+			// Repoint remaining aliases at the new canonical.
+			for _, ap := range remaining {
+				if a := fsys.byPath[ap]; a != nil {
+					a.linkedTo = newCanonical
+				}
+			}
+		}
+	}
+	e.removed = true
+	delete(fsys.byPath, e.path)
+}
+
+// --- Writer bulk copy ---
+
+// CopyFrom walks an fs.FS and adds all entries.
+// Opens files for data when Entry.Data is nil.
+// Reads symlink targets via readLinker interface when Entry.LinkTarget is empty.
+// If src implements blockSizer, the image block size is set accordingly.
+func (fsys *Writer) CopyFrom(src fs.FS, opts ...CopyOpt) error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	// Reset per-CopyFrom state.
+	fsys.copyMetadataOnly = false
+	fsys.copyMerge = false
+	fsys.copyDeviceID = 0
+	for _, opt := range opts {
+		opt(fsys)
+	}
+	// Detect EROFS image source for direct metadata/chunk extraction.
+	// The fast path (copyFromImage) only applies to MetadataOnly mode
+	// where no file data needs to be read — just inodes, dirents, and
+	// chunk indexes. For non-MetadataOnly, fall through to the fs.WalkDir
+	// path which opens files for data.
+	if srcImg, ok := src.(*image); ok {
+		if err := fsys.setBlockSize(int(srcImg.blockSize())); err != nil {
+			return err
+		}
+		if !fsys.hasBuildTime {
+			fsys.buildTime = srcImg.buildTime()
+			fsys.hasBuildTime = true
+		}
+		if fsys.copyMetadataOnly {
+			devBlocks := srcImg.deviceBlocks()
+			fsys.devices = append(fsys.devices, devBlocks...)
+			fsys.copyDeviceID = uint16(len(fsys.devices) - len(devBlocks) + 1)
+			return fsys.copyFromImage(srcImg)
+		}
+	}
+	if bs, ok := src.(blockSizer); ok {
+		if err := fsys.setBlockSize(int(bs.BlockSize())); err != nil {
+			return err
+		}
+	}
+	if fsys.copyMetadataOnly {
+		if db, ok := src.(deviceBlocker); ok {
+			fsys.devices = append(fsys.devices, db.DeviceBlocks())
+			fsys.copyDeviceID = uint16(len(fsys.devices))
+		}
+	}
+	if bt, ok := src.(buildTimer); ok && !fsys.hasBuildTime {
+		fsys.buildTime = bt.BuildTime()
+		fsys.hasBuildTime = true
+	}
+
+	// seenIno tracks inode identity across the walk for sources that expose
+	// Stat.Ino (EROFS images) so that hardlinks (multiple paths sharing one
+	// NID with nlink > 1) are preserved via Link() rather than duplicated.
+	// Keyed by Ino; value is the first-seen destination path.
+	var seenIno map[int64]string
+
+	return fs.WalkDir(src, ".", func(fpath string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		info, err := d.Info()
+		if err != nil {
+			return fmt.Errorf("stat %s: %w", fpath, err)
+		}
+
+		// Normalize path to absolute
+		p := "/" + fpath
+		if fpath == "." {
+			p = "/"
+		}
+
+		// Merge mode: process whiteout markers.
+		if fsys.copyMerge && p != "/" {
+			base := path.Base(p)
+			if strings.HasPrefix(base, whiteoutPrefix) {
+				if base == opaqueWhiteout {
+					// Opaque directory: remove all prior children of parent.
+					fsys.removeChildren(path.Dir(p))
+				} else {
+					// File whiteout: remove the named entry.
+					target := path.Join(path.Dir(p), base[len(whiteoutPrefix):])
+					fsys.remove(target)
+				}
+				return nil
+			}
+		}
+
+		// Extract extended metadata from Sys().
+		var be *builder.Entry
+		switch sys := info.Sys().(type) {
+		case *builder.Entry:
+			be = sys
+		case *Stat:
+			// EROFS image source: convert *Stat to *builder.Entry.
+			// Also detect hardlinks via Ino + Nlink.
+			if !info.IsDir() && sys.Nlink > 1 && p != "/" {
+				if seenIno == nil {
+					seenIno = make(map[int64]string)
+				}
+				if firstPath, seen := seenIno[sys.Ino]; seen {
+					// Second (or later) path to this inode: create a hardlink.
+					if err := fsys.Link(firstPath, p); err != nil {
+						return fmt.Errorf("link %s → %s: %w", firstPath, p, err)
+					}
+					return nil
+				}
+				seenIno[sys.Ino] = p
+			}
+			be = &builder.Entry{
+				UID:     sys.UID,
+				GID:     sys.GID,
+				Mtime:   sys.Mtime,
+				MtimeNs: sys.MtimeNs,
+				Nlink:   uint32(sys.Nlink),
+				Rdev:    sys.Rdev,
+				Xattrs:  sys.Xattrs,
+			}
+		}
+
+		// For regular files, get a data reader.
+		if info.Mode().IsRegular() && info.Size() > 0 && (be == nil || be.Data == nil) {
+			// In metadata-only mode, data is referenced via chunk indexes
+			// from the source — no need to open the file.
+			if fsys.copyMetadataOnly {
+				if be == nil {
+					be = entryFromSys(info)
+					if be == nil {
+						be = &builder.Entry{}
+					}
+				}
+				// Generate chunks from DataRange if available.
+				if len(be.Chunks) == 0 {
+					if dr, ok := info.(dataRanger); ok {
+						if ranges := dr.DataRange(); len(ranges) > 0 {
+							chunks, err := fsys.chunksFromRanges(ranges, info.Size())
+							if err != nil {
+								return fmt.Errorf("chunksFromRanges %s: %w", p, err)
+							}
+							be.Chunks = chunks
+							// Contiguous: a single non-hole range whose total-size
+							// invariant is satisfied (guaranteed by chunksFromRanges)
+							// means the file is fully covered by one contiguous extent.
+							be.Contiguous = len(ranges) == 1 && ranges[0].Offset != holeOffset
+						}
+					}
+				}
+				return fsys.add(p, &entryFileInfo{info: info, sys: be})
+			}
+			// For EROFS sources, use direct SectionReader (bypasses
+			// block-at-a-time reader for contiguous flat-plain data).
+			if srcImg, ok := src.(*image); ok {
+				if st, ok := info.Sys().(*Stat); ok {
+					f := file{img: srcImg, nid: uint64(st.Ino)}
+					if ino, err := f.readInfo(); err == nil {
+						if dr := srcImg.openDirect(ino); dr != nil {
+							if be == nil {
+								be = &builder.Entry{}
+							}
+							be.Data = dr
+							return fsys.add(p, &entryFileInfo{info: info, sys: be})
+						}
+					}
+				}
+			}
+			f, err := src.Open(fpath)
+			if err != nil {
+				return fmt.Errorf("open %s: %w", fpath, err)
+			}
+			if be == nil {
+				be = entryFromSys(info)
+				if be == nil {
+					be = &builder.Entry{}
+				}
+			}
+			be.Data = f.(io.Reader)
+			return fsys.add(p, &entryFileInfo{info: info, sys: be})
+		}
+
+		// For symlinks without LinkTarget, read via ReadLink interface.
+		if info.Mode()&fs.ModeSymlink != 0 && (be == nil || be.LinkTarget == "") {
+			if rl, ok := src.(readLinker); ok {
+				target, err := rl.ReadLink(fpath)
+				if err != nil {
+					return fmt.Errorf("readlink %s: %w", fpath, err)
+				}
+				if be == nil {
+					be = entryFromSys(info)
+					if be == nil {
+						be = &builder.Entry{}
+					}
+				}
+				be.LinkTarget = target
+				return fsys.add(p, &entryFileInfo{info: info, sys: be})
+			}
+		}
+
+		// For directories, ensure nlink >= 2.
+		if info.Mode().IsDir() {
+			if be == nil {
+				be = entryFromSys(info)
+				if be == nil {
+					be = &builder.Entry{Nlink: 2}
+				}
+			}
+			if be.Nlink < 2 {
+				be.Nlink = 2
+			}
+			return fsys.add(p, &entryFileInfo{info: info, sys: be})
+		}
+
+		// General case: devices, fifos, sockets, etc.
+		// Wrap in entryFileInfo when be was extracted from Sys()
+		// so that add() sees the metadata.
+		if be != nil {
+			return fsys.add(p, &entryFileInfo{info: info, sys: be})
+		}
+		return fsys.add(p, info)
+	})
+}
+
+// --- Writer finalization ---
+
+// Close writes the EROFS image. The FS must not be used after Close.
+func (fsys *Writer) Close() error {
+	if fsys.wErr != nil {
+		return fsys.wErr
+	}
+	if fsys.closed {
+		return fmt.Errorf("mkfs: FS already closed")
+	}
+	fsys.closed = true
+
+	if fsys.spool != nil {
+		defer func() { _ = fsys.spool.Close() }()
+	}
+
+	fsys.resolveBlockSize()
+
+	if fsys.dataFile != nil {
+		// Fill in the reserved device slot 0 with the actual block count.
+		blocks := (fsys.dataOff + int64(fsys.blockSize) - 1) / int64(fsys.blockSize)
+		fsys.devices[0] = uint64(blocks)
+	}
+
+	buildTime := fsys.buildTime
+	if !fsys.hasBuildTime {
+		buildTime = uint64(time.Now().Unix())
+	}
+
+	// Build erofsEntry tree from the fsEntry tree via BFS.
+	root := fsys.buildErofsTree()
+
+	var chunkBits uint8
+	for cs := fsys.blockSize; cs < 4096; cs <<= 1 {
+		chunkBits++
+	}
+
+	ew := &erofsWriter{
+		buildTime:   buildTime,
+		buildTimeNs: fsys.buildTimeNs,
+		devices:     fsys.devices,
+		blockSize:   fsys.blockSize,
+		chunkBits:   chunkBits,
+		zeroBuf:     make([]byte, fsys.blockSize),
+	}
+
+	ew.planLayout(root)
+	fixParentNids(root, root)
+
+	return ew.write(fsys.out)
+}
+
+// Stat returns file info for the named path. The name is cleaned the same
+// way as other Writer methods (leading slash, no trailing slash).
+func (fsys *Writer) Stat(name string) (fs.FileInfo, error) {
+	name = cleanPath(name)
+	e, ok := fsys.byPath[name]
+	if !ok {
+		return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist}
+	}
+	return &writerFileInfo{entry: e}, nil
+}
+
+// Open opens the named file for reading. For regular files, the file must
+// have been closed (data finalized) before it can be opened for reading.
+// For directories, the returned file implements fs.ReadDirFile.
+func (fsys *Writer) Open(name string) (fs.File, error) {
+	name = cleanPath(name)
+	e, ok := fsys.byPath[name]
+	if !ok {
+		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
+	}
+
+	typ := e.mode & disk.StatTypeMask
+	switch typ {
+	case disk.StatTypeDir:
+		return &readDir{fsys: fsys, entry: e}, nil
+
+	case disk.StatTypeReg:
+		if !e.fileClosed {
+			return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("file not yet closed for writing")}
+		}
+		var sr *io.SectionReader
+		if fsys.dataFile != nil {
+			sr = io.NewSectionReader(fsys.dataFile, e.dataStartOff, int64(e.size))
+		} else if fsys.spool != nil && e.size > 0 {
+			sr = io.NewSectionReader(fsys.spool, e.dataStartOff, int64(e.size))
+		}
+		return &readFile{entry: e, reader: sr}, nil
+
+	default:
+		// Symlinks, devices, etc.: stat-only, no readable data.
+		return &readFile{entry: e}, nil
+	}
+}
+
+// --- File methods ---
+
+// Write appends data to the file.
+func (f *File) Write(p []byte) (int, error) {
+	if f.closed {
+		return 0, fmt.Errorf("mkfs: write to closed file")
+	}
+
+	if f.fs.dataFile != nil {
+		n, err := f.fs.dataFile.Write(p)
+		f.written += int64(n)
+		f.fs.dataOff += int64(n)
+		return n, err
+	}
+
+	n, err := f.fs.spool.Write(p)
+	f.written += int64(n)
+	f.fs.spoolOff += int64(n)
+	return n, err
+}
+
+// ReadFrom implements io.ReaderFrom, allowing io.Copy(f, src) to use
+// a shared buffer instead of allocating a new 32KB buffer per call.
+func (f *File) ReadFrom(r io.Reader) (int64, error) {
+	buf := f.fs.copyBuf()
+	var written int64
+	for {
+		nr, er := r.Read(buf)
+		if nr > 0 {
+			nw, ew := f.Write(buf[:nr])
+			written += int64(nw)
+			if ew != nil {
+				return written, ew
+			}
+		}
+		if er != nil {
+			if er == io.EOF {
+				return written, nil
+			}
+			return written, er
+		}
+	}
+}
+
+// Close commits the file entry. For data file mode, pads to block
+// boundary and records chunk indexes.
+func (f *File) Close() error {
+	if f.closed {
+		return fmt.Errorf("mkfs: file already closed")
+	}
+	f.closed = true
+	f.entry.fileClosed = true
+	f.entry.size = uint64(f.written)
+
+	if f.fs.dataFile != nil {
+		return f.closeDataFile()
+	}
+	return nil
+}
+
+// Chmod sets permission bits on the file, matching os.File.Chmod.
+func (f *File) Chmod(mode fs.FileMode) error {
+	perm := goModeToUnixMode(mode) & 0o7777
+	f.entry.mode = (f.entry.mode & disk.StatTypeMask) | perm
+	return nil
+}
+
+// Chown sets the owner UID and GID on the file, matching os.File.Chown.
+func (f *File) Chown(uid, gid int) error {
+	f.entry.uid = uint32(uid)
+	f.entry.gid = uint32(gid)
+	return nil
+}
+
+// Chtimes sets access and modification times on the open file. EROFS only
+// stores mtime on disk; atime is retained on the in-memory entry for
+// read-back before [Writer.Close].
+func (f *File) Chtimes(atime, mtime time.Time) error {
+	f.entry.atime = uint64(atime.Unix())
+	f.entry.atimeNs = uint32(atime.Nanosecond())
+	f.entry.mtime = uint64(mtime.Unix())
+	f.entry.mtimeNs = uint32(mtime.Nanosecond())
+	return nil
+}
+
+// --- Internal types ---
+
+// fsEntry is the in-memory representation of a filesystem entry held by Writer.
+type fsEntry struct {
+	path       string
+	mode       uint16
+	uid, gid   uint32
+	atime      uint64
+	atimeNs    uint32
+	mtime      uint64
+	mtimeNs    uint32
+	nlink      uint32
+	nlinkSet   bool // true if SetNlink was called
+	size       uint64
+	rdev       uint32
+	xattrs     map[string]string
+	linkTarget string
+	chunks     []builder.Chunk
+	contiguous bool // data blocks are contiguous; flat-plain is sufficient
+
+	// Hardlink support: linkedTo points to the canonical fsEntry this entry
+	// is a hardlink of. hardlinks collects alias paths on the canonical entry.
+	// Only one of these is non-nil/non-empty per entry.
+	linkedTo  *fsEntry // non-nil if this is an alias (hardlink) of another entry
+	hardlinks []string // alias paths on the canonical entry; nil if no hardlinks
+
+	// Tree structure — maintained during add/remove.
+	parent   *fsEntry
+	children []*fsEntry
+
+	// data location in spool file
+	spoolOff     int64
+	dataStartOff int64     // byte offset where file data begins (spool or data file)
+	fileClosed   bool      // true after File.Close() is called
+	directData   io.Reader // bypasses spool; set by add() for source-provided data
+
+	removed      bool // true if removed by a whiteout in a merge layer
+	metadataOnly bool // from a metadata-only CopyFrom; use chunk-based layout
+}
+
+// createOptions holds the parsed option values for Create.
+type createOptions struct {
+	buildTime    uint64
+	buildTimeNs  uint32
+	hasBuildTime bool
+	blockSize    int      // 0 = use default
+	dataFile     *os.File // external data file for metadata-only mode
+	tempDir      string   // temp directory for spool file
+}
+
+// blockSizer may be implemented by an fs.FS to declare its block size.
+// Writer.CopyFrom uses this to set the image block size automatically.
+type blockSizer interface {
+	BlockSize() uint32
+}
+
+// buildTimer may be implemented by an fs.FS to suggest a build timestamp.
+// If the caller hasn't set WithBuildTime, CopyFrom uses this value.
+// Entries whose mtime matches the build time can use compact (32-byte) inodes.
+type buildTimer interface {
+	BuildTime() uint64
+}
+
+// deviceBlocker may be implemented by an fs.FS to declare the total
+// block count of its backing device. Writer.CopyFrom uses this to
+// configure the device slot for metadata-only mode.
+type deviceBlocker interface {
+	DeviceBlocks() uint64
+}
+
+// readLinker is an interface for filesystems that support reading symlink targets.
+type readLinker interface {
+	ReadLink(name string) (string, error)
+}
+
+// dataRanger may be implemented by fs.FileInfo to provide the physical
+// location of uncompressed file data in backing devices. CopyFrom checks
+// this via type assertion in metadata-only mode to build chunk indexes
+// without requiring the caller to construct internal chunk types.
+//
+// This interface should only be implemented for files whose device data
+// is stored verbatim (uncompressed). For compressed files, return nil or
+// do not implement the interface. In full-image mode CopyFrom then falls
+// back to reading through Open(), which decompresses transparently. In
+// MetadataOnly mode there is no such fallback: the file is stored as a
+// chunk-based inode with no physical mappings (all holes).
+type dataRanger interface {
+	DataRange() []DataRange
+}
+
+// --- Internal types ---
+
+// erofsEntry is the internal representation of a file/dir/symlink used by the builder.
+type erofsEntry struct {
+	mode    uint16
+	uid     uint32
+	gid     uint32
+	mtime   uint64
+	mtimeNs uint32
+	nlink   uint32
+	size    uint64
+	rdev    uint32
+
+	name      string
+	path      string
+	children  []*erofsEntry
+	symTarget string
+
+	// linkTo is non-nil for hardlink alias entries. These entries are only
+	// emitted as dirents pointing at linkTo's NID; no inode is written for them.
+	linkTo *erofsEntry
+
+	// For regular files — metadata-only mode
+	chunks       []builder.Chunk
+	contiguous   bool  // data blocks are contiguous; use large chunk size
+	chunkBits    uint8 // per-entry chunk bits (0 = use global)
+	metadataOnly bool  // chunk-based layout even without chunks
+
+	// For regular files — full-image mode
+	data io.Reader
+
+	// Extended attributes
+	xattrs map[string]string
+
+	// EROFS layout (assigned during planning)
+	nid           uint64
+	parentNid     uint64
+	erofsFileType uint8
+	layout        uint8
+	compact       bool // true = 32-byte compact inode; false = 64-byte extended
+	xattrSize     int  // bytes of xattr area (0 if no xattrs)
+	trailingSize  int
+
+	// Data block address for flat-plain files (full-image mode)
+	dataBlkAddr uint32
+}
+
+// --- Internal helpers ---
+
+// cleanPath normalizes a filesystem path to an absolute rooted form.
+func cleanPath(p string) string {
+	if p == "" || p == "." || p == "/" {
+		return "/"
+	}
+	p = path.Clean(p)
+	if !strings.HasPrefix(p, "/") {
+		p = "/" + p
+	}
+	return p
+}
+
+// fixParentNids sets the parent NID in the ".." dirent for all directories.
+// This must be called after planLayout has assigned NIDs.
+func fixParentNids(e *erofsEntry, parent *erofsEntry) {
+	e.parentNid = parent.nid
+	for _, c := range e.children {
+		if c.mode&disk.StatTypeMask == disk.StatTypeDir {
+			fixParentNids(c, e)
+		}
+	}
+}
+
+// entryFileInfo wraps an fs.FileInfo but overrides Sys() to return a *builder.Entry.
+type entryFileInfo struct {
+	info fs.FileInfo
+	sys  *builder.Entry
+}
+
+func (fi *entryFileInfo) Name() string       { return fi.info.Name() }
+func (fi *entryFileInfo) Size() int64        { return fi.info.Size() }
+func (fi *entryFileInfo) Mode() fs.FileMode  { return fi.info.Mode() }
+func (fi *entryFileInfo) ModTime() time.Time { return fi.info.ModTime() }
+func (fi *entryFileInfo) IsDir() bool        { return fi.info.IsDir() }
+func (fi *entryFileInfo) Sys() any           { return fi.sys }
+
+// writerFileInfo implements fs.FileInfo for an fsEntry.
+type writerFileInfo struct {
+	entry *fsEntry
+}
+
+func (fi *writerFileInfo) Name() string      { return path.Base(fi.entry.path) }
+func (fi *writerFileInfo) Size() int64       { return int64(fi.entry.size) }
+func (fi *writerFileInfo) Mode() fs.FileMode { return disk.EroFSModeToGoFileMode(fi.entry.mode) }
+func (fi *writerFileInfo) ModTime() time.Time {
+	return time.Unix(int64(fi.entry.mtime), int64(fi.entry.mtimeNs))
+}
+func (fi *writerFileInfo) IsDir() bool { return fi.entry.mode&disk.StatTypeMask == disk.StatTypeDir }
+func (fi *writerFileInfo) Sys() any    { return nil }
+
+// readFile implements fs.File for reading back a finalized file's data.
+type readFile struct {
+	entry  *fsEntry
+	reader *io.SectionReader // nil for empty files or non-regular types
+	closed bool
+}
+
+func (f *readFile) Stat() (fs.FileInfo, error) {
+	return &writerFileInfo{entry: f.entry}, nil
+}
+
+func (f *readFile) Read(p []byte) (int, error) {
+	if f.closed {
+		return 0, fmt.Errorf("mkfs: read from closed file")
+	}
+	if f.reader == nil {
+		return 0, io.EOF
+	}
+	return f.reader.Read(p)
+}
+
+func (f *readFile) Close() error {
+	if f.closed {
+		return fmt.Errorf("mkfs: file already closed")
+	}
+	f.closed = true
+	return nil
+}
+
+// readDir implements fs.ReadDirFile for a directory in Writer.
+type readDir struct {
+	fsys     *Writer
+	entry    *fsEntry
+	children []fs.DirEntry // lazily populated
+	offset   int
+	closed   bool
+}
+
+func (d *readDir) Stat() (fs.FileInfo, error) {
+	return &writerFileInfo{entry: d.entry}, nil
+}
+
+func (d *readDir) Read([]byte) (int, error) {
+	return 0, &fs.PathError{Op: "read", Path: d.entry.path, Err: fmt.Errorf("is a directory")}
+}
+
+func (d *readDir) Close() error {
+	if d.closed {
+		return fmt.Errorf("mkfs: dir already closed")
+	}
+	d.closed = true
+	return nil
+}
+
+func (d *readDir) ReadDir(n int) ([]fs.DirEntry, error) {
+	if d.closed {
+		return nil, fmt.Errorf("mkfs: read from closed dir")
+	}
+	if d.children == nil {
+		d.children = d.collectChildren()
+	}
+
+	if n <= 0 {
+		entries := d.children[d.offset:]
+		d.offset = len(d.children)
+		return entries, nil
+	}
+
+	remaining := d.children[d.offset:]
+	if len(remaining) == 0 {
+		return nil, io.EOF
+	}
+	if n > len(remaining) {
+		n = len(remaining)
+	}
+	entries := remaining[:n]
+	d.offset += n
+	if d.offset >= len(d.children) {
+		return entries, io.EOF
+	}
+	return entries, nil
+}
+
+func (d *readDir) collectChildren() []fs.DirEntry {
+	children := make([]fs.DirEntry, 0, len(d.entry.children))
+	for _, e := range d.entry.children {
+		if e.removed {
+			continue
+		}
+		children = append(children, &dirEntry{entry: e})
+	}
+	sort.Slice(children, func(i, j int) bool {
+		return children[i].Name() < children[j].Name()
+	})
+	return children
+}
+
+// dirEntry implements fs.DirEntry for an fsEntry.
+type dirEntry struct {
+	entry *fsEntry
+}
+
+func (de *dirEntry) Name() string               { return path.Base(de.entry.path) }
+func (de *dirEntry) IsDir() bool                { return de.entry.mode&disk.StatTypeMask == disk.StatTypeDir }
+func (de *dirEntry) Type() fs.FileMode          { return disk.EroFSModeToGoFileMode(de.entry.mode).Type() }
+func (de *dirEntry) Info() (fs.FileInfo, error) { return &writerFileInfo{entry: de.entry}, nil }
+
+// add adds a single entry. Mode and Size come from info; extended metadata
+// comes from info.Sys(). Checks Sys() for *builder.Entry first, then
+// platform-specific stat types as a fallback for plain fs.FS sources.
+func (fsys *Writer) add(p string, info fs.FileInfo) error {
+	p = cleanPath(p)
+	mode := goModeToUnixMode(info.Mode())
+	size := uint64(info.Size())
+	typ := mode & disk.StatTypeMask
+
+	be := entryFromSys(info)
+	if be == nil {
+		be = &builder.Entry{}
+	}
+
+	if p == "/" {
+		root := fsys.root
+		root.mode = mode
+		root.uid = be.UID
+		root.gid = be.GID
+		root.mtime = be.Mtime
+		root.mtimeNs = be.MtimeNs
+		if be.Nlink > 0 {
+			root.nlink = be.Nlink
+			root.nlinkSet = true
+		}
+		root.xattrs = be.Xattrs
+		return nil
+	}
+
+	fsys.ensureParent(p)
+
+	fe := &fsEntry{
+		path:       p,
+		mode:       mode,
+		uid:        be.UID,
+		gid:        be.GID,
+		mtime:      be.Mtime,
+		mtimeNs:    be.MtimeNs,
+		size:       size,
+		rdev:       be.Rdev,
+		xattrs:     be.Xattrs,
+		linkTarget: be.LinkTarget,
+		chunks:     be.Chunks,
+		contiguous: be.Contiguous,
+	}
+	if be.Nlink > 0 {
+		fe.nlink = be.Nlink
+		fe.nlinkSet = true
+	}
+
+	// Handle duplicate paths (overwrite semantics).
+	if existing, ok := fsys.byPath[p]; ok {
+		// Preserve tree linkage when overwriting.
+		savedParent := existing.parent
+		savedChildren := existing.children
+		*existing = *fe
+		existing.parent = savedParent
+		existing.children = savedChildren
+		fe = existing
+	} else {
+		fsys.addChild(fe)
+	}
+
+	if fsys.copyMetadataOnly {
+		fe.metadataOnly = true
+		// Remap chunk DeviceIDs from source-relative to absolute.
+		// For single-device sources, all chunks use DeviceID=1
+		// and get mapped to copyDeviceID.
+		// For multi-device sources (e.g. EROFS images), chunks have
+		// DeviceIDs 1..N that get offset by copyDeviceID-1.
+		if fsys.copyDeviceID > 0 {
+			offset := fsys.copyDeviceID - 1
+			for i := range fe.chunks {
+				fe.chunks[i].DeviceID += offset
+			}
+		}
+	}
+
+	// Write regular file data.
+	// Skip entirely in metadata-only mode.
+	needData := typ == disk.StatTypeReg && size > 0 && be.Data != nil &&
+		!fsys.copyMetadataOnly
+	if needData {
+		// Data is stored locally; clear any source chunk mappings.
+		fe.chunks = nil
+		fe.contiguous = false
+		if fsys.dataFile != nil {
+			// Data file mode: copy through File for block-aligned padding and chunk recording.
+			f := &File{fs: fsys, entry: fe}
+			f.dataStartOff = fsys.dataOff
+			fe.dataStartOff = fsys.dataOff
+			if _, err := f.ReadFrom(be.Data); err != nil {
+				return err
+			}
+			if err := f.Close(); err != nil {
+				return err
+			}
+		} else {
+			// Spool mode: keep a direct reference to avoid copying.
+			fe.directData = be.Data
+			fe.fileClosed = true
+		}
+	} else {
+		fe.fileClosed = true
+	}
+
+	return nil
+}
+
+// checkPath validates that a path hasn't already been registered.
+func (fsys *Writer) checkPath(name string) error {
+	if fsys.closed {
+		return fmt.Errorf("mkfs: FS is closed")
+	}
+	if _, ok := fsys.byPath[name]; ok {
+		return fmt.Errorf("mkfs: duplicate path %q", name)
+	}
+	return nil
+}
+
+// ensureParent creates implicit parent directories for name.
+func (fsys *Writer) ensureParent(name string) {
+	dir := path.Dir(name)
+	if dir == "/" {
+		return
+	}
+	// Walk up to find existing ancestors.
+	var missing []string
+	for d := dir; d != "/"; d = path.Dir(d) {
+		if _, ok := fsys.byPath[d]; ok {
+			break
+		}
+		missing = append(missing, d)
+	}
+	// Create in top-down order.
+	for i := len(missing) - 1; i >= 0; i-- {
+		d := missing[i]
+		e := &fsEntry{
+			path: d,
+			mode: disk.StatTypeDir | 0o755,
+		}
+		fsys.addChild(e)
+	}
+}
+
+// addChild registers an entry in the tree and byPath map.
+// The entry's parent is resolved from its path.
+func (fsys *Writer) addChild(e *fsEntry) {
+	parent := fsys.byPath[path.Dir(e.path)]
+	if parent == nil {
+		parent = fsys.root
+	}
+	e.parent = parent
+	parent.children = append(parent.children, e)
+	fsys.byPath[e.path] = e
+}
+
+// remove marks an entry and all its descendants as removed.
+// Used by Merge to process whiteout deletions.
+func (fsys *Writer) remove(p string) {
+	p = cleanPath(p)
+	e, ok := fsys.byPath[p]
+	if !ok {
+		return
+	}
+	e.removed = true
+	delete(fsys.byPath, p)
+	if e.mode&disk.StatTypeMask == disk.StatTypeDir {
+		fsys.removeSubtree(e)
+	}
+}
+
+// removeChildren marks all descendants of a directory as removed.
+// The directory itself is not removed.
+func (fsys *Writer) removeChildren(dir string) {
+	dir = cleanPath(dir)
+	e, ok := fsys.byPath[dir]
+	if !ok {
+		return
+	}
+	fsys.removeSubtree(e)
+}
+
+// removeSubtree recursively marks all descendants of e as removed.
+func (fsys *Writer) removeSubtree(e *fsEntry) {
+	for _, c := range e.children {
+		if !c.removed {
+			c.removed = true
+			delete(fsys.byPath, c.path)
+			if c.mode&disk.StatTypeMask == disk.StatTypeDir {
+				fsys.removeSubtree(c)
+			}
+		}
+	}
+}
+
+// buildErofsTree converts the fsEntry tree into an erofsEntry tree via BFS.
+// Children are sorted for deterministic output. The Writer is consumed.
+//
+// Hardlink aliases (fsEntry.linkedTo != nil) do not produce their own inode.
+// Instead they contribute a dirent in their parent directory that points at
+// the canonical entry's erofsEntry (via erofsEntry.linkTo).
+func (fsys *Writer) buildErofsTree() *erofsEntry {
+	type pair struct {
+		fs *fsEntry
+		er *erofsEntry
+	}
+
+	// Map from canonical fsEntry to its erofsEntry, for hardlink alias resolution.
+	canonical := make(map[*fsEntry]*erofsEntry)
+
+	rootEr := fsys.fsToErofs(fsys.root)
+	canonical[fsys.root] = rootEr
+	queue := []pair{{fsys.root, rootEr}}
+
+	for len(queue) > 0 {
+		cur := queue[0]
+		queue = queue[1:]
+
+		// Count child directories for nlink (aliases are never dirs).
+		var childDirs uint32
+		for _, c := range cur.fs.children {
+			if !c.removed && c.linkedTo == nil && c.mode&disk.StatTypeMask == disk.StatTypeDir {
+				childDirs++
+			}
+		}
+		if !cur.fs.nlinkSet && cur.fs.mode&disk.StatTypeMask == disk.StatTypeDir {
+			cur.er.nlink = 2 + childDirs
+		}
+
+		// Convert and enqueue children.
+		if len(cur.fs.children) > 0 {
+			cur.er.children = make([]*erofsEntry, 0, len(cur.fs.children))
+		}
+		for _, c := range cur.fs.children {
+			if c.removed {
+				continue
+			}
+			if c.linkedTo != nil {
+				// Hardlink alias: create a stub erofsEntry that references the
+				// canonical erofsEntry. The canonical entry may not be converted
+				// yet (it lives in a different directory), so we resolve lazily
+				// after the full BFS using the canonical map.
+				alias := &erofsEntry{
+					name: path.Base(c.path),
+					path: c.path,
+				}
+				// Store the canonical fsEntry pointer in a side-channel so we
+				// can patch alias.linkTo after the BFS.
+				// We use a temporary trick: store it in linkTo as *erofsEntry
+				// only after the canonical has been created.
+				// For now, remember (alias, c.linkedTo) to patch later.
+				cur.er.children = append(cur.er.children, alias)
+				// We need the canonical erofsEntry — look it up or defer.
+				if ce, ok := canonical[c.linkedTo]; ok {
+					alias.linkTo = ce
+					alias.erofsFileType = ce.erofsFileType
+				} else {
+					// The canonical entry hasn't been created yet (it's in a
+					// directory later in the BFS). We'll patch it in a second
+					// pass below. Temporarily stash the fsEntry in a map.
+					_ = c // patched below via patchList
+				}
+				continue
+			}
+			ent := fsys.fsToErofs(c)
+			canonical[c] = ent
+			cur.er.children = append(cur.er.children, ent)
+			if c.mode&disk.StatTypeMask == disk.StatTypeDir {
+				queue = append(queue, pair{c, ent})
+			}
+		}
+
+		// Sort children for deterministic output.
+		sort.Slice(cur.er.children, func(i, j int) bool {
+			return cur.er.children[i].name < cur.er.children[j].name
+		})
+	}
+
+	// Second pass: patch any alias entries whose canonical erofsEntry was not
+	// yet available during BFS (cross-directory hardlinks where the target
+	// directory appears later in BFS order).
+	fsys.patchHardlinkAliases(rootEr, canonical)
+
+	// Third pass: set nlink on canonical entries that have hardlink aliases.
+	for fs, er := range canonical {
+		if len(fs.hardlinks) > 0 && !fs.nlinkSet {
+			er.nlink = uint32(len(fs.hardlinks) + 1)
+		}
+	}
+
+	return rootEr
+}
+
+// patchHardlinkAliases resolves any alias erofsEntry nodes whose linkTo was
+// not yet known during the BFS (because the canonical entry was in a later
+// directory). It does a DFS over the erofsEntry tree.
+func (fsys *Writer) patchHardlinkAliases(e *erofsEntry, canonical map[*fsEntry]*erofsEntry) {
+	for _, c := range e.children {
+		if c.linkTo == nil && c.mode == 0 && len(c.children) == 0 {
+			// This is an unpatched alias stub: look up via byPath.
+			if fe, ok := fsys.byPath[c.path]; ok && fe.linkedTo != nil {
+				if ce, ok := canonical[fe.linkedTo]; ok {
+					c.linkTo = ce
+					c.erofsFileType = ce.erofsFileType
+				}
+			}
+		}
+		if c.mode&disk.StatTypeMask == disk.StatTypeDir {
+			fsys.patchHardlinkAliases(c, canonical)
+		}
+	}
+}
+
+// fsToErofs converts a single fsEntry to an erofsEntry, resolving data readers.
+func (fsys *Writer) fsToErofs(e *fsEntry) *erofsEntry {
+	var nlink uint32
+	switch {
+	case e.nlinkSet:
+		nlink = e.nlink
+	case e.mode&disk.StatTypeMask == disk.StatTypeDir:
+		nlink = 2 // adjusted by buildErofsTree
+	default:
+		nlink = 1
+	}
+
+	var data io.Reader
+	if fsys.dataFile == nil && len(e.chunks) == 0 && !e.metadataOnly &&
+		e.mode&disk.StatTypeMask == disk.StatTypeReg && e.size > 0 {
+		if e.directData != nil {
+			data = e.directData
+		} else if fsys.spool != nil {
+			data = io.NewSectionReader(fsys.spool, e.spoolOff, int64(e.size))
+		}
+	}
+
+	return &erofsEntry{
+		mode:          e.mode,
+		uid:           e.uid,
+		gid:           e.gid,
+		mtime:         e.mtime,
+		mtimeNs:       e.mtimeNs,
+		nlink:         nlink,
+		size:          e.size,
+		rdev:          e.rdev,
+		name:          path.Base(e.path),
+		path:          e.path,
+		symTarget:     e.linkTarget,
+		chunks:        e.chunks,
+		contiguous:    e.contiguous,
+		metadataOnly:  e.metadataOnly,
+		data:          data,
+		xattrs:        e.xattrs,
+		erofsFileType: modeToFileType(e.mode),
+	}
+}
+
+// setBlockSize sets the image block size. If already set to a different
+// value, it returns an error. Safe to call multiple times with the same value.
+func (fsys *Writer) setBlockSize(n int) error {
+	if n < minBlockSize || n > maxBlockSize {
+		return fmt.Errorf("mkfs: invalid block size %d: must be between %d and %d", n, minBlockSize, maxBlockSize)
+	}
+	if bits.OnesCount(uint(n)) != 1 {
+		return fmt.Errorf("mkfs: invalid block size %d: must be a power of two", n)
+	}
+	if fsys.blockSize == 0 {
+		fsys.blockSize = n
+		return nil
+	}
+	if fsys.blockSize != n {
+		return fmt.Errorf("mkfs: block size conflict: already %d, requested %d", fsys.blockSize, n)
+	}
+	return nil
+}
+
+// resolveBlockSize returns the block size, defaulting to 4096 if unset.
+func (fsys *Writer) resolveBlockSize() int {
+	if fsys.blockSize == 0 {
+		fsys.blockSize = defaultBlockSize
+	}
+	return fsys.blockSize
+}
+
+// copyBuf returns a shared 32KB buffer for io.Copy operations.
+func (fsys *Writer) copyBuf() []byte {
+	if fsys.cpBuf == nil {
+		fsys.cpBuf = make([]byte, 32*1024)
+	}
+	return fsys.cpBuf
+}
+
+// zeroPad returns a shared zero buffer sized to the resolved block size.
+func (fsys *Writer) zeroPad() []byte {
+	if fsys.padBuf == nil {
+		fsys.padBuf = make([]byte, fsys.resolveBlockSize())
+	}
+	return fsys.padBuf
+}
+
+// chunksFromRanges converts DataRange entries into internal chunk entries.
+// fileSize is the logical size of the file; the sum of all range Sizes must
+// equal fileSize exactly, or an error is returned.
+//
+// The block size used is the Writer's resolved block size. DataRange.Device
+// values are offset by 1 to produce chunk DeviceIDs: DataRange Device 0
+// becomes chunk DeviceID 1 (the first extra device), matching the EROFS
+// convention where DeviceID 0 is the primary image.
+//
+// Validation rules:
+//   - sum(Size) == fileSize; a mismatch is rejected.
+//   - r.Size > 0 for every entry.
+//   - Hole entries (Offset == -1) emit [builder.NullPhysicalBlock] chunks.
+//     Hole Size must be block-aligned for non-final entries; the final entry
+//     may end mid-block to match the file tail.
+//   - For data entries: r.Offset >= 0 and block-aligned; r.Device == 0.
+//   - For non-final data entries: r.Size must be a multiple of blockSize.
+//     The final entry may have a partial last block to match the file tail.
+func (fsys *Writer) chunksFromRanges(ranges []DataRange, fileSize int64) ([]builder.Chunk, error) {
+	blockSize := uint64(fsys.resolveBlockSize())
+
+	// Validate total coverage first.
+	var total int64
+	for _, r := range ranges {
+		total += r.Size
+	}
+	if total != fileSize {
+		return nil, fmt.Errorf("DataRange total size %d does not match file size %d", total, fileSize)
+	}
+
+	last := len(ranges) - 1
+	var chunks []builder.Chunk
+	for i, r := range ranges {
+		if r.Size <= 0 {
+			return nil, fmt.Errorf("DataRange[%d]: non-positive Size %d", i, r.Size)
+		}
+		// Non-final entries must be block-aligned in size; the final entry may
+		// end mid-block to match the file tail.
+		if i < last && uint64(r.Size)%blockSize != 0 {
+			return nil, fmt.Errorf("DataRange[%d]: non-final Size %d is not block-aligned (block size %d)", i, r.Size, blockSize)
+		}
+		if r.Offset == holeOffset {
+			// Hole: emit NullPhysicalBlock chunks covering the hole span.
+			totalBlocks := (uint64(r.Size) + blockSize - 1) / blockSize
+			for totalBlocks > 0 {
+				count := totalBlocks
+				if count > 65535 {
+					count = 65535
+				}
+				chunks = append(chunks, builder.Chunk{
+					PhysicalBlock: builder.NullPhysicalBlock,
+					Count:         uint16(count),
+				})
+				totalBlocks -= count
+			}
+			continue
+		}
+		if r.Offset < 0 {
+			return nil, fmt.Errorf("DataRange[%d]: negative Offset %d", i, r.Offset)
+		}
+		if uint64(r.Offset)%blockSize != 0 {
+			return nil, fmt.Errorf("DataRange[%d]: Offset %d is not block-aligned (block size %d)", i, r.Offset, blockSize)
+		}
+		// Non-EROFS sources register exactly one device via DeviceBlocks();
+		// only Device=0 is valid. Device=0xFFFF would also wrap deviceID to 0
+		// (the primary image), producing an invalid mapping.
+		if r.Device != 0 {
+			return nil, fmt.Errorf("DataRange[%d]: Device %d out of range (source declared one device, only Device=0 is valid)", i, r.Device)
+		}
+		deviceID := r.Device + 1
+		startBlock := uint64(r.Offset) / blockSize
+		totalBlocks := (uint64(r.Size) + blockSize - 1) / blockSize
+		for totalBlocks > 0 {
+			count := totalBlocks
+			if count > 65535 {
+				count = 65535
+			}
+			chunks = append(chunks, builder.Chunk{
+				PhysicalBlock: startBlock,
+				Count:         uint16(count),
+				DeviceID:      deviceID,
+			})
+			startBlock += count
+			totalBlocks -= count
+		}
+	}
+	return chunks, nil
+}
+
+// ensureSpool lazily creates the spool temp file.
+func (fsys *Writer) ensureSpool() error {
+	if fsys.spool != nil {
+		return nil
+	}
+	tmp, err := os.CreateTemp(fsys.tempDir, "erofs-mkfs-*")
+	if err != nil {
+		return fmt.Errorf("mkfs: create spool: %w", err)
+	}
+	_ = os.Remove(tmp.Name()) // unlink immediately; fd keeps data accessible
+	fsys.spool = tmp
+	return nil
+}
+
+func (fsys *Writer) lookup(name string) (*fsEntry, error) {
+	name = cleanPath(name)
+	e, ok := fsys.byPath[name]
+	if !ok {
+		return nil, fmt.Errorf("mkfs: path not found %q", name)
+	}
+	return e, nil
+}
+
+// closeDataFile pads the data file to a block boundary and records chunks.
+func (f *File) closeDataFile() error {
+	if f.written == 0 {
+		return nil
+	}
+
+	// Pad to block boundary.
+	bs := int64(f.fs.resolveBlockSize())
+	rem := f.fs.dataOff % bs
+	if rem != 0 {
+		padSize := bs - rem
+		n, err := f.fs.dataFile.Write(f.fs.zeroPad()[:padSize])
+		f.fs.dataOff += int64(n)
+		if err != nil {
+			return fmt.Errorf("mkfs: pad data file: %w", err)
+		}
+	}
+
+	// Compute chunks from the start offset and written bytes.
+	startBlock := uint64(f.dataStartOff) / uint64(f.fs.resolveBlockSize())
+	totalBlocks := (uint64(f.written) + uint64(f.fs.resolveBlockSize()) - 1) / uint64(f.fs.resolveBlockSize())
+
+	for totalBlocks > 0 {
+		count := totalBlocks
+		if count > 65535 {
+			count = 65535
+		}
+		f.entry.chunks = append(f.entry.chunks, builder.Chunk{
+			PhysicalBlock: startBlock,
+			Count:         uint16(count),
+			DeviceID:      1,
+		})
+		startBlock += count
+		totalBlocks -= count
+	}
+
+	return nil
+}
+
+// --- Constants ---
+
+const (
+	minBlockSize     = 512
+	defaultBlockSize = 4096
+	nullAddr         = 0xFFFFFFFF // marks a hole/sparse chunk
+
+	// Overlay whiteout markers (AUFS convention used by OCI layers).
+	whiteoutPrefix = ".wh."
+	opaqueWhiteout = ".wh..wh..opq"
+)
+
+// blkBits returns log2(blockSize).
+func blkBits(blockSize int) uint8 {
+	return uint8(bits.TrailingZeros(uint(blockSize)))
+}
diff --git a/vendor/github.com/erofs/go-erofs/mkfs_darwin.go b/vendor/github.com/erofs/go-erofs/mkfs_darwin.go
new file mode 100644
index 0000000..dbb418a
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/mkfs_darwin.go
@@ -0,0 +1,26 @@
+package erofs
+
+import (
+	"io/fs"
+	"syscall"
+
+	"github.com/erofs/go-erofs/internal/builder"
+)
+
+func entryFromSys(info fs.FileInfo) *builder.Entry {
+	switch sys := info.Sys().(type) {
+	case *builder.Entry:
+		return sys
+	case *syscall.Stat_t:
+		return &builder.Entry{
+			UID:     sys.Uid,
+			GID:     sys.Gid,
+			Mtime:   uint64(sys.Mtimespec.Sec),
+			MtimeNs: uint32(sys.Mtimespec.Nsec),
+			Nlink:   uint32(sys.Nlink),
+			Rdev:    uint32(sys.Rdev),
+		}
+	default:
+		return nil
+	}
+}
diff --git a/vendor/github.com/erofs/go-erofs/mkfs_image.go b/vendor/github.com/erofs/go-erofs/mkfs_image.go
new file mode 100644
index 0000000..0c33b5b
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/mkfs_image.go
@@ -0,0 +1,526 @@
+package erofs
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"path"
+
+	"github.com/erofs/go-erofs/internal/builder"
+	"github.com/erofs/go-erofs/internal/disk"
+)
+
+// newMetaReader returns an at() function backed by an eagerly-read
+// metadata buffer plus an on-demand block cache for data blocks
+// outside the metadata region.
+func newMetaReader(ra io.ReaderAt, metaStart, totalBytes int64, blockSize int) func(int64) []byte {
+	metaSize := totalBytes - metaStart
+	if metaSize <= 0 {
+		return func(int64) []byte { return nil }
+	}
+	metaBuf := make([]byte, metaSize)
+	if n, err := ra.ReadAt(metaBuf, metaStart); err != nil || int64(n) != metaSize {
+		return func(int64) []byte { return nil }
+	}
+
+	cache := make(map[int64][]byte)
+
+	return func(off int64) []byte {
+		// Fast path: offset in metadata region.
+		if off >= metaStart {
+			o := off - metaStart
+			if o >= int64(len(metaBuf)) {
+				return nil
+			}
+			return metaBuf[o:]
+		}
+		// Outside metadata — flat-plain data block. Load on demand.
+		if off < 0 || off >= totalBytes {
+			return nil
+		}
+		blkAddr := off - off%int64(blockSize)
+		if cached, ok := cache[blkAddr]; ok {
+			return cached[off-blkAddr:]
+		}
+		sz := int64(blockSize)
+		if blkAddr+sz > totalBytes {
+			sz = totalBytes - blkAddr
+		}
+		buf := make([]byte, sz)
+		if n, err := ra.ReadAt(buf, blkAddr); err != nil || int64(n) != sz {
+			return nil
+		}
+		cache[blkAddr] = buf
+		return buf[off-blkAddr:]
+	}
+}
+
+// imgQEntry is a BFS queue entry for the image metadata walk.
+type imgQEntry struct {
+	nid  uint64
+	path string
+}
+
+// copyFromImage is a fast path for CopyFrom when the source is an *image.
+// Instead of walking via the fs.FS interface (which does per-inode ReadAt
+// syscalls), it reads the entire metadata area into memory and parses
+// inodes, directory entries, xattrs, and chunk indexes directly from the
+// buffer. This reduces thousands of syscalls to a single ReadAt.
+//
+// Hardlinks are preserved: when two directory entries share the same NID and
+// the inode is not a directory, the second (and subsequent) paths are
+// registered via Writer.Link rather than as independent inodes.
+func (fsys *Writer) copyFromImage(img *image) error {
+	metaStart := img.metaStartPos()
+	totalBytes := int64(img.sb.Blocks) << img.sb.BlkSizeBits
+	if totalBytes <= 0 {
+		return nil
+	}
+
+	blkBits := img.sb.BlkSizeBits
+	buildTime := img.sb.BuildTime
+	buildTimeNs := img.sb.BuildTimeNs
+
+	blockSize := int(1 << blkBits)
+
+	// Get an accessor for image data. Reads the metadata region eagerly
+	// and loads flat-plain data blocks on demand.
+	at := newMetaReader(img.meta, metaStart, totalBytes, blockSize)
+
+	// Shared xattr block address (if present). The at() function
+	// will load the block on demand when xattrs are parsed.
+	var sharedXattrOff int64
+	if img.sb.XattrBlkAddr > 0 {
+		sharedXattrOff = int64(img.sb.XattrBlkAddr) << blkBits
+	}
+
+	// Pre-allocate based on inode count from superblock.
+	inodeCount := int(img.sb.Inos)
+	if inodeCount == 0 {
+		inodeCount = 64
+	}
+	queue := make([]imgQEntry, 0, inodeCount)
+	queue = append(queue, imgQEntry{nid: uint64(img.sb.RootNid), path: "/"})
+
+	// seenNID tracks the first destination path for each source NID that has
+	// nlink > 1 and is not a directory. When a NID is seen a second time, we
+	// call Writer.Link instead of creating a new inode, preserving hardlinks.
+	seenNID := make(map[uint64]string)
+
+	for len(queue) > 0 {
+		cur := queue[0]
+		queue = queue[1:]
+
+		// Merge mode: process whiteout markers.
+		if fsys.copyMerge && cur.path != "/" {
+			base := path.Base(cur.path)
+			if len(base) > len(whiteoutPrefix) && base[:len(whiteoutPrefix)] == whiteoutPrefix {
+				if base == opaqueWhiteout {
+					fsys.removeChildren(path.Dir(cur.path))
+				} else {
+					target := path.Dir(cur.path) + "/" + base[len(whiteoutPrefix):]
+					if path.Dir(cur.path) == "/" {
+						target = "/" + base[len(whiteoutPrefix):]
+					}
+					fsys.remove(target)
+				}
+				continue
+			}
+		}
+
+		inodeAddr := metaStart + int64(cur.nid*disk.SizeInodeCompact)
+		buf := at(inodeAddr)
+		if len(buf) < disk.SizeInodeCompact {
+			return fmt.Errorf("inode %d out of range", cur.nid)
+		}
+
+		format := binary.LittleEndian.Uint16(buf[:2])
+		layout := uint8((format & 0x0E) >> 1)
+		compact := format&0x01 == 0
+
+		if compact && len(buf) < disk.SizeInodeCompact {
+			return fmt.Errorf("compact inode %d out of range", cur.nid)
+		}
+		if !compact && len(buf) < disk.SizeInodeExtended {
+			return fmt.Errorf("extended inode %d out of range", cur.nid)
+		}
+
+		var (
+			mode    uint16
+			uid     uint32
+			gid     uint32
+			nlink   uint32
+			size    uint64
+			idata   uint32
+			mtime   uint64
+			mtimeNs uint32
+			xcnt    uint16
+			icSize  int
+		)
+
+		if compact {
+			var ino disk.InodeCompact
+			if _, err := binary.Decode(buf[:disk.SizeInodeCompact], binary.LittleEndian, &ino); err != nil {
+				return fmt.Errorf("decode compact inode %d: %w", cur.nid, err)
+			}
+			mode = ino.Mode
+			uid = uint32(ino.UID)
+			gid = uint32(ino.GID)
+			nlink = uint32(ino.Nlink)
+			size = uint64(ino.Size)
+			idata = ino.InodeData
+			mtime = buildTime
+			mtimeNs = buildTimeNs
+			xcnt = ino.XattrCount
+			icSize = disk.SizeInodeCompact
+		} else {
+			var ino disk.InodeExtended
+			if _, err := binary.Decode(buf[:disk.SizeInodeExtended], binary.LittleEndian, &ino); err != nil {
+				return fmt.Errorf("decode extended inode %d: %w", cur.nid, err)
+			}
+			mode = ino.Mode
+			uid = ino.UID
+			gid = ino.GID
+			nlink = ino.Nlink
+			size = ino.Size
+			idata = ino.InodeData
+			mtime = ino.Mtime
+			mtimeNs = ino.MtimeNs
+			xcnt = ino.XattrCount
+			icSize = disk.SizeInodeExtended
+		}
+
+		// Parse xattr area.
+		xattrSize := 0
+		if xcnt > 0 {
+			xattrSize = int(xcnt-1)*disk.SizeXattrEntry + disk.SizeXattrBodyHeader
+		}
+		var xattrs map[string]string
+		if xattrSize > 0 {
+			xattrAddr := inodeAddr + int64(icSize)
+			xb := at(xattrAddr)
+			if len(xb) >= xattrSize {
+				xattrs = parseXattrsFromBuf(xb[:xattrSize], at, sharedXattrOff, img.getLongPrefix)
+			}
+		}
+
+		trailingAddr := inodeAddr + int64(icSize) + int64(xattrSize)
+		typ := mode & disk.StatTypeMask
+
+		// Hardlink detection: if this is a non-directory inode with nlink > 1
+		// that we've already registered under a different path, call Link()
+		// to share the inode rather than creating a duplicate.
+		if typ != disk.StatTypeDir && nlink > 1 {
+			if firstPath, seen := seenNID[cur.nid]; seen {
+				// Second (or later) path to this inode: emit a hardlink.
+				if cur.path != "/" {
+					if err := fsys.Link(firstPath, cur.path); err != nil {
+						return fmt.Errorf("link %s → %s: %w", firstPath, cur.path, err)
+					}
+				}
+				continue
+			}
+			// First time we see this NID; record it for future aliases.
+			seenNID[cur.nid] = cur.path
+		}
+
+		// Build fsEntry directly, bypassing builder.Entry + add() overhead.
+		fe := &fsEntry{
+			path:    cur.path,
+			mode:    mode,
+			uid:     uid,
+			gid:     gid,
+			mtime:   mtime,
+			mtimeNs: mtimeNs,
+			size:    size,
+			xattrs:  xattrs,
+		}
+		if nlink > 0 {
+			fe.nlink = nlink
+			fe.nlinkSet = true
+		}
+		fe.fileClosed = true
+		if fsys.copyMetadataOnly {
+			fe.metadataOnly = true
+		}
+
+		switch typ {
+		case disk.StatTypeDir:
+			dirSize := int(size)
+			if dirSize > 0 {
+				var dirData []byte
+				switch layout {
+				case disk.LayoutFlatPlain:
+					dataAddr := int64(idata) << blkBits
+					d := at(dataAddr)
+					if d != nil && len(d) >= dirSize {
+						dirData = d[:dirSize]
+					} else {
+						dirData = make([]byte, dirSize)
+						if _, err := img.meta.ReadAt(dirData, dataAddr); err != nil {
+							return fmt.Errorf("read dir data for nid %d: %w", cur.nid, err)
+						}
+					}
+				case disk.LayoutFlatInline:
+					d := at(trailingAddr)
+					if d != nil && len(d) >= dirSize {
+						dirData = d[:dirSize]
+					}
+				}
+				if dirData != nil {
+					fsys.parseDirBlock(dirData, dirSize, blockSize, cur.path, &queue)
+				}
+			}
+
+		case disk.StatTypeSymlink:
+			if size > 0 {
+				var linkData []byte
+				if layout == disk.LayoutFlatPlain {
+					linkData = make([]byte, size)
+					if _, err := img.meta.ReadAt(linkData, int64(idata)<<blkBits); err != nil {
+						return fmt.Errorf("read symlink data for nid %d: %w", cur.nid, err)
+					}
+				} else {
+					linkData = at(trailingAddr)
+				}
+				if linkData != nil && int(size) <= len(linkData) {
+					fe.linkTarget = string(linkData[:size])
+				}
+			}
+
+		case disk.StatTypeReg:
+			if layout == disk.LayoutChunkBased && size > 0 {
+				chunkFmt := uint16(idata)
+				if chunkFmt&disk.LayoutChunkFormatIndexes != 0 {
+					chunkAddr := trailingAddr
+					if chunkAddr%8 != 0 {
+						chunkAddr = (chunkAddr + 7) & ^int64(7)
+					}
+					fe.chunks = fsys.parseChunks(at(chunkAddr), chunkFmt, size, blkBits, img.deviceIDMask)
+					fe.contiguous = true
+				}
+			}
+
+		case disk.StatTypeChrdev, disk.StatTypeBlkdev:
+			fe.rdev = disk.RdevFromMode(mode, idata)
+		}
+
+		// Remap chunk DeviceIDs for metadata-only sources.
+		if fsys.copyMetadataOnly && fsys.copyDeviceID > 0 {
+			offset := fsys.copyDeviceID - 1
+			for i := range fe.chunks {
+				fe.chunks[i].DeviceID += offset
+			}
+		}
+
+		// Register in the tree.
+		if cur.path == "/" {
+			// Update root metadata.
+			fsys.root.mode = fe.mode
+			fsys.root.uid = fe.uid
+			fsys.root.gid = fe.gid
+			fsys.root.mtime = fe.mtime
+			fsys.root.mtimeNs = fe.mtimeNs
+			fsys.root.nlink = fe.nlink
+			fsys.root.nlinkSet = fe.nlinkSet
+			fsys.root.xattrs = fe.xattrs
+		} else if existing, ok := fsys.byPath[cur.path]; ok {
+			// Merge overwrites: preserve tree linkage.
+			savedParent := existing.parent
+			savedChildren := existing.children
+			*existing = *fe
+			existing.parent = savedParent
+			existing.children = savedChildren
+		} else {
+			fsys.addChild(fe)
+		}
+	}
+	return nil
+}
+
+// parseDirBlock extracts directory entries from dirent data and enqueues
+// child inodes for BFS traversal.
+func (fsys *Writer) parseDirBlock(data []byte, dirSize, blockSize int, parentPath string, queue *[]imgQEntry) {
+	pos := 0
+	for pos < dirSize {
+		blockEnd := pos + blockSize
+		if blockEnd > dirSize {
+			blockEnd = dirSize
+		}
+		blk := data[pos:blockEnd]
+		if len(blk) < disk.SizeDirent {
+			break
+		}
+
+		firstNameOff := binary.LittleEndian.Uint16(blk[8:10])
+		nEntries := int(firstNameOff / disk.SizeDirent)
+		if nEntries == 0 || nEntries*disk.SizeDirent > len(blk) {
+			break
+		}
+
+		for i := 0; i < nEntries; i++ {
+			off := i * disk.SizeDirent
+			nid := binary.LittleEndian.Uint64(blk[off : off+8])
+			nameOff := int(binary.LittleEndian.Uint16(blk[off+8 : off+10]))
+
+			var nameEnd int
+			if i < nEntries-1 {
+				nameEnd = int(binary.LittleEndian.Uint16(blk[(i+1)*disk.SizeDirent+8 : (i+1)*disk.SizeDirent+10]))
+			} else {
+				nameEnd = len(blk)
+			}
+			if nameOff >= len(blk) || nameEnd > len(blk) || nameOff >= nameEnd {
+				continue
+			}
+
+			// Extract name, trimming trailing NUL padding.
+			nameBytes := blk[nameOff:nameEnd]
+			for len(nameBytes) > 0 && nameBytes[len(nameBytes)-1] == 0 {
+				nameBytes = nameBytes[:len(nameBytes)-1]
+			}
+			name := string(nameBytes)
+			if name == "." || name == ".." || name == "" {
+				continue
+			}
+
+			childPath := parentPath + "/" + name
+			if parentPath == "/" {
+				childPath = "/" + name
+			}
+			*queue = append(*queue, imgQEntry{nid: nid, path: childPath})
+		}
+
+		pos = blockEnd
+	}
+}
+
+// parseChunks extracts chunk index entries from an in-memory buffer.
+func (fsys *Writer) parseChunks(data []byte, chunkFmt uint16, fileSize uint64, blkBits uint8, deviceIDMask uint16) []builder.Chunk {
+	chunkBits := blkBits + uint8(chunkFmt&disk.LayoutChunkFormatBits)
+	nchunks := int((fileSize-1)>>chunkBits) + 1
+	blocksPerChunk := 1 << (chunkBits - blkBits)
+
+	// Align to 8 bytes for index entries.
+	needed := nchunks * disk.SizeChunkIndex
+	if len(data) < needed {
+		return nil
+	}
+
+	chunks := make([]builder.Chunk, 0, nchunks)
+	for i := range nchunks {
+		off := i * disk.SizeChunkIndex
+		startBlkLo := binary.LittleEndian.Uint32(data[off+4 : off+8])
+		if ^startBlkLo == 0 {
+			continue // null/hole
+		}
+		startBlkHi := binary.LittleEndian.Uint16(data[off : off+2])
+		deviceID := binary.LittleEndian.Uint16(data[off+2:off+4]) & deviceIDMask
+		physBlock := (uint64(startBlkHi) << 32) | uint64(startBlkLo)
+
+		if len(chunks) > 0 {
+			prev := &chunks[len(chunks)-1]
+			if prev.DeviceID == deviceID &&
+				prev.PhysicalBlock+uint64(prev.Count) == physBlock &&
+				int(prev.Count)+blocksPerChunk <= 65535 {
+				prev.Count += uint16(blocksPerChunk)
+				continue
+			}
+		}
+		chunks = append(chunks, builder.Chunk{
+			PhysicalBlock: physBlock,
+			Count:         uint16(blocksPerChunk),
+			DeviceID:      deviceID,
+		})
+	}
+	return chunks
+}
+
+// parseXattrsFromBuf parses xattr entries from an in-memory buffer.
+// at provides on-demand access to the shared xattr block at sharedOff.
+// longPrefix resolves long xattr prefix indexes (NameIndex with high bit set).
+func parseXattrsFromBuf(buf []byte, at func(int64) []byte, sharedOff int64, longPrefix func(uint8) (string, error)) map[string]string {
+	if len(buf) < disk.SizeXattrBodyHeader {
+		return nil
+	}
+
+	var xh disk.XattrHeader
+	if _, err := binary.Decode(buf[:disk.SizeXattrBodyHeader], binary.LittleEndian, &xh); err != nil {
+		return nil
+	}
+	pos := disk.SizeXattrBodyHeader
+
+	xattrs := make(map[string]string)
+
+	// Resolve shared xattr references.
+	for i := 0; i < int(xh.SharedCount) && pos+4 <= len(buf); i++ {
+		idx := binary.LittleEndian.Uint32(buf[pos : pos+4])
+		pos += 4
+
+		if sharedOff == 0 {
+			continue
+		}
+		sharedBlock := at(sharedOff + int64(idx)*4)
+		if sharedBlock == nil || len(sharedBlock) < disk.SizeXattrEntry {
+			continue
+		}
+		var xe disk.XattrEntry
+		if _, err := binary.Decode(sharedBlock[:disk.SizeXattrEntry], binary.LittleEndian, &xe); err != nil {
+			continue
+		}
+		entryLen := int(xe.NameLen) + int(xe.ValueLen)
+		if disk.SizeXattrEntry+entryLen > len(sharedBlock) {
+			continue
+		}
+		sb := sharedBlock[disk.SizeXattrEntry:]
+		name := xattrName(xe, sb[:xe.NameLen], longPrefix)
+		value := string(sb[xe.NameLen : int(xe.NameLen)+int(xe.ValueLen)])
+		xattrs[name] = value
+	}
+
+	// Parse inline xattr entries.
+	for pos+disk.SizeXattrEntry <= len(buf) {
+		var xe disk.XattrEntry
+		if _, err := binary.Decode(buf[pos:pos+disk.SizeXattrEntry], binary.LittleEndian, &xe); err != nil {
+			break
+		}
+		pos += disk.SizeXattrEntry
+
+		entryLen := int(xe.NameLen) + int(xe.ValueLen)
+		if pos+entryLen > len(buf) {
+			break
+		}
+
+		name := xattrName(xe, buf[pos:pos+int(xe.NameLen)], longPrefix)
+		pos += int(xe.NameLen)
+		value := string(buf[pos : pos+int(xe.ValueLen)])
+		pos += int(xe.ValueLen)
+
+		xattrs[name] = value
+
+		// Round up to 4-byte boundary.
+		if rem := pos % 4; rem != 0 {
+			pos += 4 - rem
+		}
+	}
+	if len(xattrs) == 0 {
+		return nil
+	}
+	return xattrs
+}
+
+// xattrName builds the full xattr name from an entry and its raw name bytes.
+// longPrefix resolves long prefix indexes when the high bit of NameIndex is set.
+func xattrName(xe disk.XattrEntry, rawName []byte, longPrefix func(uint8) (string, error)) string {
+	var prefix string
+	if xe.NameIndex&0x80 != 0 {
+		// Long prefix: high bit set, low 7 bits index the prefix table.
+		if longPrefix != nil {
+			if p, err := longPrefix(xe.NameIndex & 0x7F); err == nil {
+				prefix = p
+			}
+		}
+	} else if xe.NameIndex != 0 {
+		prefix = xattrIndex(xe.NameIndex).String()
+	}
+	return prefix + string(rawName)
+}
diff --git a/vendor/github.com/erofs/go-erofs/mkfs_other.go b/vendor/github.com/erofs/go-erofs/mkfs_other.go
new file mode 100644
index 0000000..3ab0730
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/mkfs_other.go
@@ -0,0 +1,16 @@
+//go:build !linux && !darwin
+
+package erofs
+
+import (
+	"io/fs"
+
+	"github.com/erofs/go-erofs/internal/builder"
+)
+
+func entryFromSys(info fs.FileInfo) *builder.Entry {
+	if be, ok := info.Sys().(*builder.Entry); ok {
+		return be
+	}
+	return nil
+}
diff --git a/vendor/github.com/erofs/go-erofs/mkfs_unix.go b/vendor/github.com/erofs/go-erofs/mkfs_unix.go
new file mode 100644
index 0000000..b4a6a48
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/mkfs_unix.go
@@ -0,0 +1,30 @@
+//go:build linux
+
+package erofs
+
+import (
+	"io/fs"
+	"syscall"
+
+	"github.com/erofs/go-erofs/internal/builder"
+)
+
+// entryFromSys extracts metadata from info.Sys(). Returns nil if the
+// type is not recognized, allowing the caller to use a default.
+func entryFromSys(info fs.FileInfo) *builder.Entry {
+	switch sys := info.Sys().(type) {
+	case *builder.Entry:
+		return sys
+	case *syscall.Stat_t:
+		return &builder.Entry{
+			UID:     sys.Uid,
+			GID:     sys.Gid,
+			Mtime:   uint64(sys.Mtim.Sec),
+			MtimeNs: uint32(sys.Mtim.Nsec),
+			Nlink:   uint32(sys.Nlink),
+			Rdev:    uint32(sys.Rdev),
+		}
+	default:
+		return nil
+	}
+}
diff --git a/vendor/github.com/erofs/go-erofs/writer.go b/vendor/github.com/erofs/go-erofs/writer.go
new file mode 100644
index 0000000..8bdaf9d
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/writer.go
@@ -0,0 +1,689 @@
+package erofs
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"sort"
+
+	"github.com/erofs/go-erofs/internal/builder"
+	"github.com/erofs/go-erofs/internal/disk"
+)
+
+// maxBlockSize is the largest block size we support. EROFS images with
+// larger block sizes are unmountable on common platforms (aarch64 caps
+// page size at 64 KiB) and the reader rejects BlkSizeBits > 16.
+const maxBlockSize = 1 << 16
+
+// onlyWriter wraps an io.Writer to hide io.ReaderFrom so that
+// io.CopyBuffer uses the caller-provided buffer instead of
+// the destination's ReadFrom (which allocates its own).
+type onlyWriter struct{ io.Writer }
+
+// erofsWriter serializes EROFS metadata to an io.Writer.
+type erofsWriter struct {
+	entries     []*erofsEntry // all entries in NID order
+	rootNid     uint64
+	metaBlkAddr uint32
+	totalInodes uint64
+	buildTime   uint64
+	buildTimeNs uint32
+	devices     []uint64 // per-device block counts (one slot per entry)
+	blockSize   int
+	chunkBits   uint8                        // log2(chunkSize / blockSize); chunkSize = blockSize << chunkBits
+	copyBuf     []byte                       // reusable buffer for io.CopyBuffer
+	zeroBuf     []byte                       // blockSize-length zero buffer for padding
+	inodeBuf    [disk.SizeInodeExtended]byte // scratch buffer for writeInode
+}
+
+// inodeSize returns the on-disk inode header size for e.
+func inodeCoreSize(e *erofsEntry) int {
+	if e.compact {
+		return disk.SizeInodeCompact
+	}
+	return disk.SizeInodeExtended
+}
+
+// entryChunkBits returns the chunk bits for a specific entry.
+// Contiguous entries use a larger chunk size to minimize chunk indexes.
+func (w *erofsWriter) entryChunkBits(e *erofsEntry) uint8 {
+	if e.chunkBits > 0 {
+		return e.chunkBits
+	}
+	return w.chunkBits
+}
+
+// entryChunkSize returns the chunk size in bytes for a specific entry.
+func (w *erofsWriter) entryChunkSize(e *erofsEntry) int {
+	return w.blockSize << w.entryChunkBits(e)
+}
+
+// minChunkBits returns the minimum chunkBits such that file size fits in
+// one chunk (chunkSize >= size). Capped at 31 (LayoutChunkFormatBits max).
+func (w *erofsWriter) minChunkBits(size uint64) uint8 {
+	bits := w.chunkBits
+	for uint64(w.blockSize)<<bits < size && bits < 31 {
+		bits++
+	}
+	return bits
+}
+
+func (w *erofsWriter) write(out io.WriteSeeker) error {
+	w.copyBuf = make([]byte, 256*1024) // shared io.CopyBuffer buffer
+	return w.writeSeekable(out)
+}
+
+// writeSeekable uses a data-first on-disk layout: block0 (placeholder),
+// data blocks, metadata. After everything is written, it seeks back to
+// write the real superblock. This matches how mkfs.erofs lays out
+// streaming sources — data is written as it arrives, metadata last.
+func (w *erofsWriter) writeSeekable(out io.WriteSeeker) error {
+	// Data-first layout: sbArea, data blocks, metadata.
+	// Set metaBlkAddr to a sentinel so assignDataBlocks uses data-first.
+	w.metaBlkAddr = 0xFFFFFFFF
+	w.assignDataBlocks()
+
+	// Write placeholder superblock area.
+	if _, err := out.Write(make([]byte, w.sbAreaSize())); err != nil {
+		return err
+	}
+
+	// Stream data blocks directly to output.
+	if err := w.writeDataBlocks(out); err != nil {
+		return err
+	}
+
+	// Buffer and write metadata.
+	meta := w.newMetaBuffer()
+	if err := w.writeMetadataInodes(meta); err != nil {
+		return err
+	}
+	if _, err := meta.WriteTo(out); err != nil {
+		return err
+	}
+
+	// Seek back and write the real block 0 (superblock).
+	if _, err := out.Seek(0, io.SeekStart); err != nil {
+		return err
+	}
+	return w.writeBlock0(out)
+}
+
+// newMetaBuffer returns a pre-sized bytes.Buffer for metadata serialization.
+func (w *erofsWriter) newMetaBuffer() *bytes.Buffer {
+	totalMetaBytes := 0
+	for _, e := range w.entries {
+		isz := disk.SizeInodeExtended
+		if e.compact {
+			isz = disk.SizeInodeCompact
+		}
+		sz := isz + e.xattrSize + e.trailingSize
+		if sz%32 != 0 {
+			sz = (sz + 31) & ^31
+		}
+		totalMetaBytes += sz
+	}
+	// SB area + metadata padded to block boundary.
+	capacity := w.blockSize + ((totalMetaBytes + w.blockSize - 1) & ^(w.blockSize - 1))
+	buf := bytes.NewBuffer(make([]byte, 0, capacity))
+	return buf
+}
+
+// assignDataBlocks assigns data block addresses to flat-plain entries.
+// For metadata-first layout, data follows metadata.
+// For data-first layout, data starts after the superblock area.
+func (w *erofsWriter) assignDataBlocks() {
+	sbBlks := w.sbAreaBlocks()
+	if w.metaBlkAddr == uint32(sbBlks) {
+		// Metadata-first: data blocks come after metadata.
+		totalMetaBytes := 0
+		for _, e := range w.entries {
+			expectedOff := int(e.nid) * 32
+			sz := inodeCoreSize(e) + e.xattrSize + e.trailingSize
+			if sz%32 != 0 {
+				sz = (sz + 31) & ^31
+			}
+			end := expectedOff + sz
+			if end > totalMetaBytes {
+				totalMetaBytes = end
+			}
+		}
+		metaBlocks := (totalMetaBytes + w.blockSize - 1) / w.blockSize
+		addr := uint32(w.sbAreaBlocks() + metaBlocks)
+		for _, e := range w.entries {
+			if ds := w.flatPlainDataSize(e); ds > 0 {
+				e.dataBlkAddr = addr
+				addr += uint32((ds + w.blockSize - 1) / w.blockSize)
+			}
+		}
+	} else {
+		// Data-first: data starts after superblock area.
+		addr := uint32(w.sbAreaBlocks())
+		for _, e := range w.entries {
+			if ds := w.flatPlainDataSize(e); ds > 0 {
+				e.dataBlkAddr = addr
+				addr += uint32((ds + w.blockSize - 1) / w.blockSize)
+			}
+		}
+		w.metaBlkAddr = addr // metadata follows data
+	}
+}
+
+// sbAreaSize returns the number of bytes needed for the superblock area
+// (blocks before metadata): 1024-byte pad + superblock + device slots,
+// rounded up to block boundary.
+func (w *erofsWriter) sbAreaSize() int {
+	n := disk.SuperBlockOffset + disk.SizeSuperBlock
+	if len(w.devices) > 0 {
+		n += len(w.devices) * disk.SizeDeviceSlot
+	}
+	return ((n + w.blockSize - 1) / w.blockSize) * w.blockSize
+}
+
+// sbAreaBlocks returns the number of blocks occupied by the superblock area.
+func (w *erofsWriter) sbAreaBlocks() int {
+	return w.sbAreaSize() / w.blockSize
+}
+
+// metadataBytes computes the total size of the metadata area, including
+// any zero-padding inserted to reach each inode's expected offset (NID * 32)
+// and rounding each entry up to a 32-byte boundary.
+func (w *erofsWriter) metadataBytes() int {
+	curOff := 0
+	for _, e := range w.entries {
+		expectedOff := int(e.nid) * 32
+		if curOff < expectedOff {
+			curOff = expectedOff
+		}
+		sz := inodeCoreSize(e) + e.xattrSize + e.trailingSize
+		if rem := sz % 32; rem != 0 {
+			sz += 32 - rem
+		}
+		curOff += sz
+	}
+	return curOff
+}
+
+func (w *erofsWriter) writeBlock0(buf io.Writer) error {
+	sbArea := make([]byte, w.sbAreaSize())
+
+	totalMetaBytes := w.metadataBytes()
+	metaBlocks := (totalMetaBytes + w.blockSize - 1) / w.blockSize
+
+	// Count data blocks.
+	dataBlocks := 0
+	for _, e := range w.entries {
+		if ds := w.flatPlainDataSize(e); ds > 0 {
+			dataBlocks += (ds + w.blockSize - 1) / w.blockSize
+		}
+	}
+	totalBlocks := w.sbAreaBlocks() + metaBlocks + dataBlocks
+
+	var featureIncompat uint32
+	var extraDevices uint16
+	var devtSlotOff uint16
+
+	if len(w.devices) > 0 {
+		featureIncompat |= disk.FeatureIncompatDeviceTable
+		extraDevices = uint16(len(w.devices))
+		devtSlotOff = uint16(disk.SizeSuperBlock / 16)
+	}
+	for _, e := range w.entries {
+		if len(e.chunks) > 0 {
+			featureIncompat |= disk.FeatureIncompatChunkedFile
+			break
+		}
+	}
+
+	sb := disk.SuperBlock{
+		MagicNumber:     disk.MagicNumber,
+		BlkSizeBits:     blkBits(w.blockSize),
+		RootNid:         uint16(w.rootNid),
+		Inos:            w.totalInodes,
+		BuildTime:       w.buildTime,
+		BuildTimeNs:     w.buildTimeNs,
+		Blocks:          uint32(totalBlocks),
+		MetaBlkAddr:     w.metaBlkAddr,
+		FeatureIncompat: featureIncompat,
+		ExtraDevices:    extraDevices,
+		DevtSlotOff:     devtSlotOff,
+	}
+
+	sbBuf := &bytes.Buffer{}
+	if err := binary.Write(sbBuf, binary.LittleEndian, &sb); err != nil {
+		return fmt.Errorf("write superblock: %w", err)
+	}
+	copy(sbArea[disk.SuperBlockOffset:], sbBuf.Bytes())
+
+	// Write device slots right after superblock.
+	for i, blocks := range w.devices {
+		if blocks > math.MaxUint32 {
+			return fmt.Errorf("device %d block count %d exceeds 32-bit limit", i+1, blocks)
+		}
+		devSlot := disk.DeviceSlot{
+			Blocks: uint32(blocks),
+		}
+		devBuf := &bytes.Buffer{}
+		if err := binary.Write(devBuf, binary.LittleEndian, &devSlot); err != nil {
+			return fmt.Errorf("write device slot: %w", err)
+		}
+		off := disk.SuperBlockOffset + disk.SizeSuperBlock + i*disk.SizeDeviceSlot
+		copy(sbArea[off:], devBuf.Bytes())
+	}
+
+	_, err := buf.Write(sbArea)
+	return err
+}
+
+// writeMetadataInodes writes inode metadata. Data block addresses must
+// already be assigned on each entry before calling this method.
+func (w *erofsWriter) writeMetadataInodes(buf io.Writer) error {
+	metaStart := 0
+	for _, e := range w.entries {
+		expectedOff := int(e.nid) * 32
+		if expectedOff > metaStart {
+			if _, err := buf.Write(w.zeroBuf[:expectedOff-metaStart]); err != nil {
+				return err
+			}
+			metaStart = expectedOff
+		}
+
+		if err := w.writeInode(buf, e); err != nil {
+			return fmt.Errorf("write inode for %s: %w", e.path, err)
+		}
+		if e.compact {
+			metaStart += disk.SizeInodeCompact
+		} else {
+			metaStart += disk.SizeInodeExtended
+		}
+
+		// Write xattr area
+		if e.xattrSize > 0 {
+			if err := w.writeXattrs(buf, e); err != nil {
+				return fmt.Errorf("write xattrs for %s: %w", e.path, err)
+			}
+			metaStart += e.xattrSize
+		}
+
+		// Write trailing data
+		switch e.mode & disk.StatTypeMask {
+		case disk.StatTypeReg:
+			if e.layout == disk.LayoutChunkBased && (e.size > 0 || len(e.chunks) > 0) {
+				if err := w.writeChunkIndexes(buf, e); err != nil {
+					return fmt.Errorf("write chunks for %s: %w", e.path, err)
+				}
+				metaStart += e.trailingSize
+			} else if e.layout == disk.LayoutFlatInline && e.size > 0 && e.data != nil {
+				n, err := io.CopyBuffer(onlyWriter{buf}, io.LimitReader(e.data, int64(e.size)), w.copyBuf)
+				if c, ok := e.data.(io.Closer); ok {
+					_ = c.Close()
+				}
+				if err != nil {
+					return fmt.Errorf("write inline data for %s: %w", e.path, err)
+				}
+				metaStart += int(n)
+			}
+		case disk.StatTypeDir:
+			if e.layout == disk.LayoutFlatInline {
+				n, err := w.writeDirents(buf, e)
+				if err != nil {
+					return fmt.Errorf("write dirents for %s: %w", e.path, err)
+				}
+				metaStart += n
+			}
+		case disk.StatTypeSymlink:
+			if e.layout == disk.LayoutFlatInline {
+				if _, err := io.WriteString(buf, e.symTarget); err != nil {
+					return fmt.Errorf("write symlink for %s: %w", e.path, err)
+				}
+				metaStart += len(e.symTarget)
+			}
+		}
+
+		// Pad to 32-byte boundary
+		inodeSize := disk.SizeInodeExtended
+		if e.compact {
+			inodeSize = disk.SizeInodeCompact
+		}
+		totalWritten := inodeSize + e.xattrSize + e.trailingSize
+		if totalWritten%32 != 0 {
+			padSize := 32 - (totalWritten % 32)
+			if _, err := buf.Write(w.zeroBuf[:padSize]); err != nil {
+				return err
+			}
+			metaStart += padSize
+		}
+	}
+
+	// Pad metadata to full block boundary
+	if metaStart%w.blockSize != 0 {
+		padSize := w.blockSize - (metaStart % w.blockSize)
+		if _, err := buf.Write(w.zeroBuf[:padSize]); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (w *erofsWriter) writeInode(buf io.Writer, e *erofsEntry) error {
+	var inodeData uint32
+
+	switch e.mode & disk.StatTypeMask {
+	case disk.StatTypeReg:
+		if e.layout == disk.LayoutChunkBased {
+			inodeData = disk.LayoutChunkFormatIndexes | uint32(w.entryChunkBits(e))
+		} else if e.layout == disk.LayoutFlatPlain && e.size > 0 {
+			inodeData = e.dataBlkAddr
+		}
+	case disk.StatTypeDir, disk.StatTypeSymlink:
+		if e.layout == disk.LayoutFlatPlain {
+			inodeData = e.dataBlkAddr
+		}
+	case disk.StatTypeChrdev, disk.StatTypeBlkdev, disk.StatTypeFifo, disk.StatTypeSock:
+		inodeData = e.rdev
+	}
+
+	fileSize := e.size
+	switch e.mode & disk.StatTypeMask {
+	case disk.StatTypeDir:
+		fileSize = uint64(w.direntDataSize(e))
+	case disk.StatTypeSymlink:
+		fileSize = uint64(len(e.symTarget))
+	}
+
+	b := &w.inodeBuf
+	clear(b[:])
+
+	if e.compact {
+		binary.LittleEndian.PutUint16(b[0:2], inodeFormat(e.layout, true))
+		binary.LittleEndian.PutUint16(b[2:4], xattrCount(e.xattrSize))
+		binary.LittleEndian.PutUint16(b[4:6], e.mode)
+		binary.LittleEndian.PutUint16(b[6:8], uint16(e.nlink))
+		binary.LittleEndian.PutUint32(b[8:12], uint32(fileSize))
+		binary.LittleEndian.PutUint32(b[16:20], inodeData)
+		binary.LittleEndian.PutUint16(b[24:26], uint16(e.uid))
+		binary.LittleEndian.PutUint16(b[26:28], uint16(e.gid))
+		_, err := buf.Write(b[:disk.SizeInodeCompact])
+		return err
+	}
+
+	binary.LittleEndian.PutUint16(b[0:2], inodeFormat(e.layout, false))
+	binary.LittleEndian.PutUint16(b[2:4], xattrCount(e.xattrSize))
+	binary.LittleEndian.PutUint16(b[4:6], e.mode)
+	binary.LittleEndian.PutUint64(b[8:16], fileSize)
+	binary.LittleEndian.PutUint32(b[16:20], inodeData)
+	binary.LittleEndian.PutUint32(b[24:28], e.uid)
+	binary.LittleEndian.PutUint32(b[28:32], e.gid)
+	binary.LittleEndian.PutUint64(b[32:40], e.mtime)
+	binary.LittleEndian.PutUint32(b[40:44], e.mtimeNs)
+	binary.LittleEndian.PutUint32(b[44:48], e.nlink)
+	_, err := buf.Write(b[:disk.SizeInodeExtended])
+	return err
+}
+
+func (w *erofsWriter) writeXattrs(buf io.Writer, e *erofsEntry) error {
+	// XattrHeader: 4-byte name filter + 1-byte shared count + 7 reserved = 12 bytes
+	var xhdr [12]byte
+	binary.LittleEndian.PutUint32(xhdr[0:4], 0xFFFFFFFF) // name filter unused
+	if _, err := buf.Write(xhdr[:]); err != nil {
+		return err
+	}
+
+	for _, name := range sortedXattrKeys(e.xattrs) {
+		value := e.xattrs[name]
+		nameIndex, suffix := xattrSplit(name)
+
+		var xent [disk.SizeXattrEntry]byte
+		xent[0] = uint8(len(suffix))
+		xent[1] = nameIndex
+		binary.LittleEndian.PutUint16(xent[2:4], uint16(len(value)))
+		if _, err := buf.Write(xent[:]); err != nil {
+			return err
+		}
+		if _, err := io.WriteString(buf, suffix); err != nil {
+			return err
+		}
+		if _, err := io.WriteString(buf, value); err != nil {
+			return err
+		}
+
+		// Pad to 4-byte boundary
+		entryLen := disk.SizeXattrEntry + len(suffix) + len(value)
+		if entryLen%4 != 0 {
+			if _, err := buf.Write(w.zeroBuf[:4-entryLen%4]); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// writeChunkIndexes writes chunk index entries for a regular file.
+// Each index entry covers one logical chunk (chunkSize bytes).
+func (w *erofsWriter) writeChunkIndexes(buf io.Writer, e *erofsEntry) error {
+	cs := w.entryChunkSize(e)
+	blocksPerChunk := cs / w.blockSize
+	nchunks := (int(e.size) + cs - 1) / cs
+
+	// Null chunk index (no mapping): StartBlkHi=0xFFFF, DeviceID=0, StartBlkLo=NullAddr.
+	var nullIdx [disk.SizeChunkIndex]byte
+	binary.LittleEndian.PutUint16(nullIdx[0:2], 0xFFFF)
+	binary.LittleEndian.PutUint32(nullIdx[4:8], nullAddr)
+
+	if len(e.chunks) > 0 {
+		// Walk source chunks and emit one index per logical chunk.
+		// Source chunks use block-granularity counts; we step by blocksPerChunk.
+		// A chunk with PhysicalBlock == builder.NullPhysicalBlock is a hole:
+		// emit nullIdx entries for its block span.
+		var scratch [disk.SizeChunkIndex]byte
+		ci := 0   // index into source chunks
+		coff := 0 // block offset within current source chunk
+		for n := 0; n < nchunks; n++ {
+			if ci >= len(e.chunks) {
+				if _, err := buf.Write(nullIdx[:]); err != nil {
+					return err
+				}
+				continue
+			}
+			c := e.chunks[ci]
+			if c.PhysicalBlock == builder.NullPhysicalBlock {
+				// Hole chunk: emit a null index entry.
+				if _, err := buf.Write(nullIdx[:]); err != nil {
+					return err
+				}
+			} else {
+				phys := c.PhysicalBlock + uint64(coff)
+				binary.LittleEndian.PutUint16(scratch[0:2], uint16(phys>>32))
+				binary.LittleEndian.PutUint16(scratch[2:4], c.DeviceID)
+				binary.LittleEndian.PutUint32(scratch[4:8], uint32(phys))
+				if _, err := buf.Write(scratch[:]); err != nil {
+					return err
+				}
+			}
+			coff += blocksPerChunk
+			for ci < len(e.chunks) && coff >= int(e.chunks[ci].Count) {
+				coff -= int(e.chunks[ci].Count)
+				ci++
+			}
+		}
+	} else {
+		for n := 0; n < nchunks; n++ {
+			if _, err := buf.Write(nullIdx[:]); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// writeDirents writes EROFS directory entries packed into block-sized chunks.
+func (w *erofsWriter) writeDirents(buf io.Writer, e *erofsEntry) (int, error) {
+	type direntInfo struct {
+		name     string
+		nid      uint64
+		fileType uint8
+	}
+
+	// Build the full entry list including "." and ".." then sort
+	// alphabetically. EROFS requires dirents to be sorted within
+	// each block; "." and ".." are not guaranteed to be first.
+	allEnts := make([]direntInfo, 0, len(e.children)+2)
+	allEnts = append(allEnts, direntInfo{".", e.nid, disk.FileTypeDir})
+	allEnts = append(allEnts, direntInfo{"..", e.parentNid, disk.FileTypeDir})
+	for _, c := range e.children {
+		nid := c.nid
+		if c.linkTo != nil {
+			nid = c.linkTo.nid // hardlink alias: point at the canonical inode
+		}
+		allEnts = append(allEnts, direntInfo{
+			name:     c.name,
+			nid:      nid,
+			fileType: c.erofsFileType,
+		})
+	}
+	sort.Slice(allEnts, func(i, j int) bool {
+		return allEnts[i].name < allEnts[j].name
+	})
+
+	totalWritten := 0
+	i := 0
+	for i < len(allEnts) {
+		// Determine how many entries fit in this block
+		start := i
+		blockUsed := 0
+		nameSize := 0
+		for j := i; j < len(allEnts); j++ {
+			headerSize := (j - start + 1) * disk.SizeDirent
+			nameSize += len(allEnts[j].name)
+			needed := headerSize + nameSize
+			if needed > w.blockSize {
+				break
+			}
+			blockUsed = needed
+			i = j + 1
+		}
+		if i == start {
+			// Single entry too large for a block (shouldn't happen)
+			blockUsed = disk.SizeDirent + len(allEnts[i].name)
+			i++
+		}
+
+		blockEnts := allEnts[start:i]
+		blockHeaderSize := len(blockEnts) * disk.SizeDirent
+
+		// Write dirent headers
+		var scratch [disk.SizeDirent]byte
+		nameOff := uint16(blockHeaderSize)
+		for j, de := range blockEnts {
+			if j > 0 {
+				nameOff += uint16(len(blockEnts[j-1].name))
+			}
+			binary.LittleEndian.PutUint64(scratch[0:8], de.nid)
+			binary.LittleEndian.PutUint16(scratch[8:10], nameOff)
+			scratch[10] = de.fileType
+			scratch[11] = 0
+			if _, err := buf.Write(scratch[:]); err != nil {
+				return totalWritten, err
+			}
+			totalWritten += disk.SizeDirent
+		}
+
+		// Write names
+		for _, de := range blockEnts {
+			n, err := io.WriteString(buf, de.name)
+			if err != nil {
+				return totalWritten, err
+			}
+			totalWritten += n
+		}
+
+		// Pad to block boundary if there are more entries
+		if i < len(allEnts) && blockUsed%w.blockSize != 0 {
+			padSize := w.blockSize - (blockUsed % w.blockSize)
+			if _, err := buf.Write(w.zeroBuf[:padSize]); err != nil {
+				return totalWritten, err
+			}
+			totalWritten += padSize
+		}
+	}
+
+	return totalWritten, nil
+}
+
+// writeDataBlocks writes data blocks for flat-plain entries directly to out.
+func (w *erofsWriter) writeDataBlocks(out io.Writer) error {
+	for _, e := range w.entries {
+		ds := w.flatPlainDataSize(e)
+		if ds == 0 {
+			continue
+		}
+
+		var n int
+		switch e.mode & disk.StatTypeMask {
+		case disk.StatTypeReg:
+			expected := int64(ds)
+			var written int64
+			var err error
+			limited := io.LimitReader(e.data, expected)
+			// Use io.Copy for *os.File sources to enable copy_file_range.
+			if _, ok := e.data.(*os.File); ok {
+				written, err = io.Copy(out, limited)
+			} else {
+				written, err = io.CopyBuffer(onlyWriter{out}, limited, w.copyBuf)
+			}
+			if c, ok := e.data.(io.Closer); ok {
+				_ = c.Close()
+			}
+			if err != nil {
+				return fmt.Errorf("write data for %s: %w", e.path, err)
+			}
+			if written != expected {
+				return fmt.Errorf("write data for %s: short read: got %d bytes, expected %d", e.path, written, expected)
+			}
+			n = int(written)
+		case disk.StatTypeDir:
+			written, err := w.writeDirents(out, e)
+			if err != nil {
+				return fmt.Errorf("write dirents for %s: %w", e.path, err)
+			}
+			n = written
+		case disk.StatTypeSymlink:
+			written, err := io.WriteString(out, e.symTarget)
+			if err != nil {
+				return fmt.Errorf("write symlink data for %s: %w", e.path, err)
+			}
+			n = written
+		}
+
+		if n%w.blockSize != 0 {
+			padSize := w.blockSize - (n % w.blockSize)
+			if _, err := out.Write(w.zeroBuf[:padSize]); err != nil {
+				return fmt.Errorf("write padding for %s: %w", e.path, err)
+			}
+		}
+	}
+	return nil
+}
+
+// flatPlainDataSize returns the data size for a flat-plain entry, or 0.
+func (w *erofsWriter) flatPlainDataSize(e *erofsEntry) int {
+	if e.layout != disk.LayoutFlatPlain {
+		return 0
+	}
+	switch e.mode & disk.StatTypeMask {
+	case disk.StatTypeReg:
+		if e.size > 0 && e.data != nil {
+			return int(e.size)
+		}
+	case disk.StatTypeDir:
+		return w.direntDataSize(e)
+	case disk.StatTypeSymlink:
+		return len(e.symTarget)
+	}
+	return 0
+}
diff --git a/vendor/github.com/erofs/go-erofs/xattr.go b/vendor/github.com/erofs/go-erofs/xattr.go
new file mode 100644
index 0000000..a08fb13
--- /dev/null
+++ b/vendor/github.com/erofs/go-erofs/xattr.go
@@ -0,0 +1,222 @@
+package erofs
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/erofs/go-erofs/internal/disk"
+)
+
+/*
+#define EROFS_XATTR_INDEX_USER              1
+#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS  2
+#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define EROFS_XATTR_INDEX_TRUSTED           4
+#define EROFS_XATTR_INDEX_LUSTRE            5
+#define EROFS_XATTR_INDEX_SECURITY          6
+*/
+
+type xattrIndex uint8
+
+func (idx xattrIndex) String() string {
+	switch idx {
+	case 1:
+		return "user."
+	case 2:
+		return "system.posix_acl_access."
+	case 3:
+		return "system.posix_acl_default."
+	case 4:
+		return "trusted."
+	case 5:
+		return "lustre."
+	case 6:
+		return "security."
+	default:
+		return ""
+	}
+}
+
+// loadXattrs reads the extended attributes for the file's inode and
+// populates the given Stat's Xattrs map.
+func loadXattrs(b *file, stat *Stat) (err error) {
+	ino := b.info
+	addr := b.img.metaStartPos() + int64(ino.nid*disk.SizeInodeCompact) + int64(ino.icsize)
+	xsize := ino.xsize
+
+	stat.Xattrs = map[string]string{}
+
+	blk, err := b.img.loadAt(addr, int64(xsize))
+	if err != nil {
+		return fmt.Errorf("failed to read xattr body for nid %d: %w", b.nid, err)
+	}
+	defer func() {
+		if blk != nil {
+			b.img.putBlock(blk)
+		}
+	}()
+
+	xb := blk.bytes()
+	if len(xb) < disk.SizeXattrBodyHeader {
+		return fmt.Errorf("xattr body too small for nid %d: %w", b.nid, ErrInvalid)
+	}
+	var xh disk.XattrHeader
+	if _, err := binary.Decode(xb[:disk.SizeXattrBodyHeader], binary.LittleEndian, &xh); err != nil {
+		return err
+	}
+	xb = xb[disk.SizeXattrBodyHeader:]
+
+	for i := 0; i < int(xh.SharedCount); i++ {
+		if len(xb) < 4 {
+			pos := disk.SizeXattrBodyHeader + int64(i)*4
+			b.img.putBlock(blk)
+			blk, err = b.img.loadAt(addr+pos, int64(xsize)-pos)
+			if err != nil {
+				return fmt.Errorf("failed to read xattr body for nid %d: %w", b.nid, err)
+			}
+			xb = blk.bytes()
+			if len(xb) < 4 {
+				return fmt.Errorf("xattr shared block too small for nid %d: %w", b.nid, ErrInvalid)
+			}
+		}
+		var xattrAddr uint32
+		if _, err := binary.Decode(xb[:4], binary.LittleEndian, &xattrAddr); err != nil {
+			return err
+		}
+
+		// TODO: Cache shared xattr blocks
+		sblk, err := b.img.loadAt(int64(b.img.sb.XattrBlkAddr)<<b.img.sb.BlkSizeBits+int64(xattrAddr*4), int64(1<<b.img.sb.BlkSizeBits))
+		if err != nil {
+			return fmt.Errorf("failed to read shared xattr body for nid %d: %w", b.nid, err)
+		}
+		sb := sblk.bytes()
+		if len(sb) < disk.SizeXattrEntry {
+			b.img.putBlock(sblk)
+			return fmt.Errorf("shared xattr block too small for nid %d: %w", b.nid, ErrInvalid)
+		}
+		var xattrEntry disk.XattrEntry
+		if _, err := binary.Decode(sb[:disk.SizeXattrEntry], binary.LittleEndian, &xattrEntry); err != nil {
+			b.img.putBlock(sblk)
+			return err
+		}
+		sb = sb[disk.SizeXattrEntry:]
+		var prefix string
+		if xattrEntry.NameIndex&0x80 == 0x80 {
+			// Long prefix: highest bit set
+			longPrefixIndex := xattrEntry.NameIndex & 0x7F
+			prefix, err = b.img.getLongPrefix(longPrefixIndex)
+			if err != nil {
+				b.img.putBlock(sblk)
+				return fmt.Errorf("failed to get long prefix for shared xattr nid %d: %w", b.nid, err)
+			}
+		} else if xattrEntry.NameIndex != 0 {
+			prefix = xattrIndex(xattrEntry.NameIndex).String()
+		}
+
+		if len(sb) < int(xattrEntry.NameLen)+int(xattrEntry.ValueLen) {
+			b.img.putBlock(sblk)
+			return fmt.Errorf("shared xattr too long for nid %d: %w", b.nid, ErrInvalid)
+		}
+		name := prefix + string(sb[:xattrEntry.NameLen])
+		sb = sb[xattrEntry.NameLen:]
+		stat.Xattrs[name] = string(sb[:xattrEntry.ValueLen])
+		b.img.putBlock(sblk)
+
+		xb = xb[4:]
+	}
+
+	pos := disk.SizeXattrBodyHeader + int(xh.SharedCount)*4
+	reload := func() error {
+		b.img.putBlock(blk)
+		blk, err = b.img.loadAt(addr+int64(pos), int64(xsize-pos))
+		if err != nil {
+			return fmt.Errorf("failed to read xattr body for nid %d: %w", b.nid, err)
+		}
+		xb = blk.bytes()
+		return nil
+	}
+	for pos < xsize {
+		if len(xb) < disk.SizeXattrEntry {
+			if err := reload(); err != nil {
+				return err
+			}
+			if len(xb) < disk.SizeXattrEntry {
+				return fmt.Errorf("xattr block too small for entry at pos %d for nid %d: %w", pos, b.nid, ErrInvalid)
+			}
+		}
+
+		var xattrEntry disk.XattrEntry
+		if _, err := binary.Decode(xb[:disk.SizeXattrEntry], binary.LittleEndian, &xattrEntry); err != nil {
+			return err
+		}
+		pos += disk.SizeXattrEntry
+		xb = xb[disk.SizeXattrEntry:]
+		var prefix string
+		if xattrEntry.NameIndex&0x80 == 0x80 {
+			// Long prefix: highest bit set
+			longPrefixIndex := xattrEntry.NameIndex & 0x7F
+			var err error
+			prefix, err = b.img.getLongPrefix(longPrefixIndex)
+			if err != nil {
+				return fmt.Errorf("failed to get long prefix for inline xattr nid %d: %w", b.nid, err)
+			}
+		} else if xattrEntry.NameIndex != 0 {
+			prefix = xattrIndex(xattrEntry.NameIndex).String()
+		}
+
+		if len(xb) < int(xattrEntry.NameLen) {
+			if err := reload(); err != nil {
+				return err
+			}
+			if len(xb) < int(xattrEntry.NameLen) {
+				return fmt.Errorf("xattr block too small for name of length %d for nid %d: %w", xattrEntry.NameLen, b.nid, ErrInvalid)
+			}
+		}
+		name := prefix + string(xb[:xattrEntry.NameLen])
+		pos += int(xattrEntry.NameLen)
+		xb = xb[xattrEntry.NameLen:]
+
+		var value string
+		if len(xb) < int(xattrEntry.ValueLen) {
+			remaining := int(xattrEntry.ValueLen)
+			buf := make([]byte, 0, remaining)
+			for remaining > 0 {
+				copySize := len(xb)
+				if copySize == 0 {
+					if err := reload(); err != nil {
+						return err
+					}
+					copySize = len(xb)
+					if copySize == 0 {
+						return fmt.Errorf("empty xattr block while reading value: %w", ErrInvalid)
+					}
+				}
+				if remaining < copySize {
+					copySize = remaining
+				}
+				buf = append(buf, xb[:copySize]...)
+				remaining -= copySize
+				pos += copySize
+				xb = xb[copySize:]
+			}
+			value = string(buf)
+		} else {
+			value = string(xb[:xattrEntry.ValueLen])
+			pos += int(xattrEntry.ValueLen)
+			xb = xb[xattrEntry.ValueLen:]
+		}
+		stat.Xattrs[name] = value
+
+		// Round up to next 4 byte boundary
+		if rem := pos % 4; rem != 0 {
+			pad := 4 - rem
+			pos += pad
+			if len(xb) < pad {
+				xb = nil
+			} else {
+				xb = xb[pad:]
+			}
+		}
+	}
+	return nil
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index c6943f5..fb8d820 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -8,6 +8,11 @@ github.com/Microsoft/go-winio/pkg/guid
 # github.com/containerd/log v0.1.0
 ## explicit; go 1.20
 github.com/containerd/log
+# github.com/erofs/go-erofs v0.3.1-0.20260531080512-069dc32d83e6
+## explicit; go 1.23
+github.com/erofs/go-erofs
+github.com/erofs/go-erofs/internal/builder
+github.com/erofs/go-erofs/internal/disk
 # github.com/opencontainers/go-digest v1.0.0
 ## explicit; go 1.13
 github.com/opencontainers/go-digest