Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions integration-tests/harness/harness.go
Original file line number Diff line number Diff line change
Expand Up @@ -1079,14 +1079,15 @@ type mockWeightsLock struct {
// mockWeightFile mirrors WeightFile from pkg/model/weights.go
// SYNC: If pkg/model/WeightFile changes, update this copy.
type mockWeightFile struct {
Name string `json:"name"`
Dest string `json:"dest"`
DigestOriginal string `json:"digestOriginal"`
Digest string `json:"digest"`
Size int64 `json:"size"`
SizeUncompressed int64 `json:"sizeUncompressed"`
MediaType string `json:"mediaType"`
ContentType string `json:"contentType,omitempty"`
Name string `json:"name"`
Dest string `json:"dest"`
DigestOriginal string `json:"digestOriginal"`
Digest string `json:"digest"`
Size int64 `json:"size"`
SourceMtimeUnixNano int64 `json:"sourceMtimeUnixNano,omitempty"`
SizeUncompressed int64 `json:"sizeUncompressed"`
MediaType string `json:"mediaType"`
ContentType string `json:"contentType,omitempty"`
}

// cmdMockWeights generates mock weight files and a weights.lock file.
Expand Down Expand Up @@ -1166,18 +1167,23 @@ func (h *Harness) cmdMockWeights(ts *testscript.TestScript, neg bool, args []str
if err := os.WriteFile(filePath, data, 0o644); err != nil {
ts.Fatalf("mock-weights: failed to write %s: %v", filename, err)
}
fi, err := os.Stat(filePath)
if err != nil {
ts.Fatalf("mock-weights: failed to stat %s: %v", filename, err)
}

// Compute digest (uncompressed, since we're not actually compressing for tests)
hash := sha256.Sum256(data)
digest := "sha256:" + hex.EncodeToString(hash[:])

files = append(files, mockWeightFile{
Name: weightName,
Dest: "/cache/" + filename,
DigestOriginal: digest,
Digest: digest, // Same as original since we're not compressing
Size: size,
SizeUncompressed: size,
Name: weightName,
Dest: "/cache/" + filename,
DigestOriginal: digest,
Digest: digest, // Same as original since we're not compressing
Size: size,
SourceMtimeUnixNano: fi.ModTime().UnixNano(),
SizeUncompressed: size,
// MediaType matches production WeightBuilder output (uncompressed).
MediaType: "application/vnd.cog.weight.layer.v1",
ContentType: "application/octet-stream",
Expand Down
40 changes: 22 additions & 18 deletions pkg/model/weight_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,12 @@ func (b *WeightBuilder) Build(ctx context.Context, spec ArtifactSpec) (Artifact,
}
return nil, fmt.Errorf("stat weight file %s: %w", ws.Source, err)
}
sourceMtimeUnixNano := fi.ModTime().UnixNano()

// Check lockfile cache: if we have a matching entry (name + size), skip hashing.
// NOTE: This cache only checks name + file size. Same-size modifications (rare for
// weight files) won't be detected. Delete the lockfile to force re-hashing.
// TODO: Consider adding mtime to the cache key for stronger invalidation.
// Check lockfile cache: if we have a matching entry (name + size + source mtime), skip hashing.
var digestStr string
var size int64
if cached := b.findCachedEntry(ws.Name(), fi.Size()); cached != nil {
if cached := b.findCachedEntry(ws.Name(), fi.Size(), sourceMtimeUnixNano); cached != nil {
digestStr = cached.Digest
size = cached.Size
} else {
Expand Down Expand Up @@ -95,16 +93,16 @@ func (b *WeightBuilder) Build(ctx context.Context, spec ArtifactSpec) (Artifact,
}

// Update lockfile
if err := b.updateLockfile(ws, digestStr, size); err != nil {
if err := b.updateLockfile(ws, digestStr, size, sourceMtimeUnixNano); err != nil {
return nil, fmt.Errorf("update lockfile: %w", err)
}

return NewWeightArtifact(ws.Name(), desc, absPath, ws.Target, cfg), nil
}

// findCachedEntry checks the lockfile for an entry matching name and fileSize.
// Returns the cached WeightFile if found and size matches, nil otherwise.
func (b *WeightBuilder) findCachedEntry(name string, fileSize int64) *WeightFile {
// findCachedEntry checks the lockfile for an entry matching name, file size, and source mtime.
// Returns the cached WeightFile if found, nil otherwise.
func (b *WeightBuilder) findCachedEntry(name string, fileSize, sourceMtimeUnixNano int64) *WeightFile {
if _, err := os.Stat(b.lockPath); err != nil {
return nil
}
Expand All @@ -113,7 +111,12 @@ func (b *WeightBuilder) findCachedEntry(name string, fileSize int64) *WeightFile
return nil
}
for i, f := range lock.Files {
if f.Name == name && f.Size == fileSize {
// Zero mtime means the lockfile entry came from legacy code and is not trusted
// for cache hits.
if f.SourceMtimeUnixNano == 0 {
continue
}
if f.Name == name && f.Size == fileSize && f.SourceMtimeUnixNano == sourceMtimeUnixNano {
return &lock.Files[i]
}
}
Expand All @@ -122,7 +125,7 @@ func (b *WeightBuilder) findCachedEntry(name string, fileSize int64) *WeightFile

// updateLockfile loads the existing lockfile (if any), adds or updates
// the entry for the given weight, and saves it back.
func (b *WeightBuilder) updateLockfile(ws *WeightSpec, digest string, size int64) error {
func (b *WeightBuilder) updateLockfile(ws *WeightSpec, digest string, size, sourceMtimeUnixNano int64) error {
// Load existing lockfile, or start fresh.
// LoadWeightsLock wraps the underlying error, so we check the raw file first.
lock := &WeightsLock{
Expand All @@ -138,13 +141,14 @@ func (b *WeightBuilder) updateLockfile(ws *WeightSpec, digest string, size int64
}

entry := WeightFile{
Name: ws.Name(),
Dest: ws.Target,
Digest: digest,
DigestOriginal: digest,
Size: size,
SizeUncompressed: size,
MediaType: MediaTypeWeightLayer,
Name: ws.Name(),
Dest: ws.Target,
Digest: digest,
DigestOriginal: digest,
Size: size,
SourceMtimeUnixNano: sourceMtimeUnixNano,
SizeUncompressed: size,
MediaType: MediaTypeWeightLayer,
}

// Update existing entry or append
Expand Down
141 changes: 140 additions & 1 deletion pkg/model/weight_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"os"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -72,7 +73,10 @@ func TestWeightBuilder_WritesLockfile(t *testing.T) {
// After Build(), a weights.lock should be written/updated at lockPath.
tmpDir := t.TempDir()
weightContent := []byte("lockfile test weight")
err := os.WriteFile(filepath.Join(tmpDir, "model.bin"), weightContent, 0o644)
weightPath := filepath.Join(tmpDir, "model.bin")
err := os.WriteFile(weightPath, weightContent, 0o644)
require.NoError(t, err)
fi, err := os.Stat(weightPath)
require.NoError(t, err)

hash := sha256.Sum256(weightContent)
Expand Down Expand Up @@ -106,6 +110,7 @@ func TestWeightBuilder_WritesLockfile(t *testing.T) {
require.Equal(t, "/weights/model.bin", wf.Dest)
require.Equal(t, expectedDigest, wf.Digest)
require.Equal(t, int64(len(weightContent)), wf.Size)
require.Equal(t, fi.ModTime().UnixNano(), wf.SourceMtimeUnixNano)
}

func TestWeightBuilder_UpdatesExistingLockfile(t *testing.T) {
Expand Down Expand Up @@ -194,6 +199,7 @@ func TestWeightBuilder_CacheHit(t *testing.T) {
require.NoError(t, err)
require.Len(t, lock.Files, 1)
require.Equal(t, "my-model", lock.Files[0].Name)
require.NotZero(t, lock.Files[0].SourceMtimeUnixNano)
}

func TestWeightBuilder_CacheMiss_SizeChanged(t *testing.T) {
Expand Down Expand Up @@ -234,6 +240,139 @@ func TestWeightBuilder_CacheMiss_SizeChanged(t *testing.T) {
require.Equal(t, int64(len(content2)), wa2.Descriptor().Size)
}

func TestWeightBuilder_CacheMiss_SameSizeMtimeChanged(t *testing.T) {
// When file content changes without a size change, the builder should re-hash
// if the source mtime changed.
tmpDir := t.TempDir()
content1 := []byte("same-size-content-v1")
content2 := []byte("same-size-content-v2")
require.Equal(t, len(content1), len(content2))

weightPath := filepath.Join(tmpDir, "model.bin")
err := os.WriteFile(weightPath, content1, 0o644)
require.NoError(t, err)

src := NewSourceFromConfig(&config.Config{
Weights: []config.WeightSource{
{Name: "my-model", Source: "model.bin", Target: "/weights/model.bin"},
},
}, tmpDir)

lockPath := filepath.Join(tmpDir, "weights.lock")
wb := NewWeightBuilder(src, "0.15.0", lockPath)
spec := NewWeightSpec("my-model", "model.bin", "/weights/model.bin")

// First build
_, err = wb.Build(context.Background(), spec)
require.NoError(t, err)

lock1, err := LoadWeightsLock(lockPath)
require.NoError(t, err)
require.Len(t, lock1.Files, 1)
firstMtime := lock1.Files[0].SourceMtimeUnixNano
require.NotZero(t, firstMtime)

// Rewrite with same-size content and force a different mtime.
err = os.WriteFile(weightPath, content2, 0o644)
require.NoError(t, err)

fi, err := os.Stat(weightPath)
require.NoError(t, err)
forcedMtime := fi.ModTime().Add(2 * time.Second)
err = os.Chtimes(weightPath, forcedMtime, forcedMtime)
require.NoError(t, err)

artifact2, err := wb.Build(context.Background(), spec)
require.NoError(t, err)

wa2 := artifact2.(*WeightArtifact)
hash2 := sha256.Sum256(content2)
expectedDigest2 := "sha256:" + hex.EncodeToString(hash2[:])
require.Equal(t, expectedDigest2, wa2.Descriptor().Digest.String())

lock2, err := LoadWeightsLock(lockPath)
require.NoError(t, err)
require.Len(t, lock2.Files, 1)
require.NotEqual(t, firstMtime, lock2.Files[0].SourceMtimeUnixNano)
}

func TestWeightBuilder_CacheMiss_WhenLockfileEntryHasNoSourceMtime(t *testing.T) {
// Old lockfiles without sourceMtimeUnixNano should miss cache and be rewritten.
tmpDir := t.TempDir()
content := []byte("legacy lockfile content")
weightPath := filepath.Join(tmpDir, "model.bin")
err := os.WriteFile(weightPath, content, 0o644)
require.NoError(t, err)

src := NewSourceFromConfig(&config.Config{
Weights: []config.WeightSource{
{Name: "my-model", Source: "model.bin", Target: "/weights/model.bin"},
},
}, tmpDir)

lockPath := filepath.Join(tmpDir, "weights.lock")
legacyLock := &WeightsLock{
Version: "1.0",
Created: time.Now().UTC(),
Files: []WeightFile{
{
Name: "my-model",
Dest: "/weights/model.bin",
DigestOriginal: "sha256:stale",
Digest: "sha256:stale",
Size: int64(len(content)),
SizeUncompressed: int64(len(content)),
MediaType: MediaTypeWeightLayer,
// SourceMtimeUnixNano intentionally omitted (zero).
},
},
}
require.NoError(t, legacyLock.Save(lockPath))

wb := NewWeightBuilder(src, "0.15.0", lockPath)
spec := NewWeightSpec("my-model", "model.bin", "/weights/model.bin")
artifact, err := wb.Build(context.Background(), spec)
require.NoError(t, err)

expectedHash := sha256.Sum256(content)
expectedDigest := "sha256:" + hex.EncodeToString(expectedHash[:])
wa := artifact.(*WeightArtifact)
require.Equal(t, expectedDigest, wa.Descriptor().Digest.String())

lock, err := LoadWeightsLock(lockPath)
require.NoError(t, err)
require.Len(t, lock.Files, 1)
require.Equal(t, expectedDigest, lock.Files[0].Digest)
require.NotZero(t, lock.Files[0].SourceMtimeUnixNano)
}

func TestWeightBuilder_FindCachedEntry_MissesLegacyZeroMtime(t *testing.T) {
tmpDir := t.TempDir()
lockPath := filepath.Join(tmpDir, "weights.lock")

lock := &WeightsLock{
Version: "1.0",
Created: time.Now().UTC(),
Files: []WeightFile{
{
Name: "my-model",
Dest: "/weights/model.bin",
DigestOriginal: "sha256:legacy",
Digest: "sha256:legacy",
Size: 123,
SourceMtimeUnixNano: 0, // Legacy entry: must never be treated as a cache hit.
SizeUncompressed: 123,
MediaType: MediaTypeWeightLayer,
},
},
}
require.NoError(t, lock.Save(lockPath))

wb := NewWeightBuilder(NewSourceFromConfig(&config.Config{}, tmpDir), "0.15.0", lockPath)
cached := wb.findCachedEntry("my-model", 123, 0)
require.Nil(t, cached)
}

func TestWeightBuilder_ErrorWrongSpecType(t *testing.T) {
tmpDir := t.TempDir()
src := NewSourceFromConfig(&config.Config{}, tmpDir)
Expand Down
2 changes: 2 additions & 0 deletions pkg/model/weights.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ type WeightFile struct {
Digest string `json:"digest"`
// Size is the compressed size in bytes.
Size int64 `json:"size"`
// SourceMtimeUnixNano is the source file modification time used for cache invalidation.
SourceMtimeUnixNano int64 `json:"sourceMtimeUnixNano,omitempty"`
// SizeUncompressed is the original size in bytes.
SizeUncompressed int64 `json:"sizeUncompressed"`
// MediaType is the OCI layer media type (e.g., application/vnd.cog.weight.layer.v1+gzip).
Expand Down