mirror of
https://github.com/ultravioletrs/cocos.git
synced 2026-06-23 04:10:25 +00:00
b44780df95
CI / lint (push) Has been cancelled
CI / test (agent) (push) Has been cancelled
CI / test (cli) (push) Has been cancelled
CI / test (cmd) (push) Has been cancelled
CI / test (internal) (push) Has been cancelled
CI / test (manager, true) (push) Has been cancelled
CI / test (pkg) (push) Has been cancelled
CI / upload-coverage (push) Has been cancelled
* feat: Enhance OCI image extraction to return algorithm and requirements paths, and add deferred cleanup for temporary files. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * feat: implement deterministic zipping and enhance checksum verification for resources Signed-off-by: Sammy Oina <sammyoina@gmail.com> * feat: Update component build sources, add gRPC health checks to the CVM server, and refine algorithm argument handling and documentation. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * docs: Update remote resources testing guide with `sudo` for KBS, algorithm result saving, `requirements.txt`, and `algo-args` for RVPS. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * refactor: Explicitly ignore `stderr.Write` return values and add minor whitespace in tests. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * test: add comprehensive error path and edge case tests for file, zip, OCI, and agent components. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * feat: Add mutexes for thread-safe algorithm execution and expand recognized data file extensions to include common archive formats. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * feat: Add OCI extraction tests for Python algorithms and multi-layer datasets, refactor algorithm execution for testability, and enhance algorithm stop and error handling tests. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * test: Add error assertions to OCI extraction test helpers and remove an unused mock exec command. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * test: Improve error handling test coverage for algorithm execution and OCI resource extraction. Signed-off-by: Sammy Oina <sammyoina@gmail.com> * fix: Improve algorithm process termination, enhance computation error handling, and add concurrency safety to agent service. Signed-off-by: Sammy Oina <sammyoina@gmail.com> --------- Signed-off-by: Sammy Oina <sammyoina@gmail.com>
1138 lines
33 KiB
Go
1138 lines
33 KiB
Go
// Copyright (c) Ultraviolet
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
package oci
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bytes"
|
|
"compress/gzip"
|
|
"context"
|
|
"encoding/json"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
const testPythonScript = "print('hello')"
|
|
|
|
func TestIsAlgorithmFile(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
filename string
|
|
mode int64
|
|
algoType string
|
|
want bool
|
|
}{
|
|
{"Python file", "algorithm.py", 0o644, "python", true},
|
|
{"WASM file", "module.wasm", 0o644, "wasm", true},
|
|
{"WAT file", "module.wat", 0o644, "wasm", true},
|
|
{"Python file as bin", "algorithm.py", 0o755, "bin", false},
|
|
{"Main python file", "main.py", 0o644, "python", true},
|
|
{"Binary file with common name", "algorithm", 0o644, "bin", true},
|
|
{"Binary file with common name run", "run", 0o644, "bin", true},
|
|
{"Executable binary", "my-app", 0o755, "bin", true},
|
|
{"CSV data file", "data.csv", 0o755, "python", false},
|
|
{"JSON config file", "config.json", 0o755, "wasm", false},
|
|
{"Text file", "readme.txt", 0o755, "bin", false},
|
|
{"Uppercase extension", "MAIN.PY", 0o644, "python", true},
|
|
{"Mixed case", "Algorithm.Py", 0o644, "python", true},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
got := isAlgorithmFile(tt.filename, tt.mode, tt.algoType)
|
|
assert.Equal(t, tt.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestIsDataFile(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
filename string
|
|
want bool
|
|
}{
|
|
{"CSV file", "data.csv", true},
|
|
{"JSON file", "config.json", true},
|
|
{"Text file", "readme.txt", true},
|
|
{"Parquet file", "data.parquet", true},
|
|
{"Arrow file", "data.arrow", true},
|
|
{"DAT file", "data.dat", true},
|
|
{"Python file", "script.py", false},
|
|
{"WASM file", "module.wasm", false},
|
|
{"Binary file", "data.bin", false},
|
|
{"Uppercase CSV", "DATA.CSV", true},
|
|
{"Nested path", "data/input/dataset.csv", true},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
got := isDataFile(tt.filename)
|
|
assert.Equal(t, tt.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestExtractAlgorithm(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("missing index.json", func(t *testing.T) {
|
|
tempDir := t.TempDir()
|
|
_, _, err := ExtractAlgorithm(context.Background(), logger, tempDir, t.TempDir(), "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to read index.json")
|
|
})
|
|
|
|
t.Run("invalid index.json", func(t *testing.T) {
|
|
tempDir := t.TempDir()
|
|
err := os.WriteFile(filepath.Join(tempDir, "index.json"), []byte("not json"), 0o644)
|
|
require.NoError(t, err)
|
|
|
|
_, _, err = ExtractAlgorithm(context.Background(), logger, tempDir, t.TempDir(), "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to parse index.json")
|
|
})
|
|
|
|
t.Run("empty manifests", func(t *testing.T) {
|
|
tempDir := t.TempDir()
|
|
index := OCIIndex{SchemaVersion: 2}
|
|
data, _ := json.Marshal(index)
|
|
err := os.WriteFile(filepath.Join(tempDir, "index.json"), data, 0o644)
|
|
require.NoError(t, err)
|
|
|
|
_, _, err = ExtractAlgorithm(context.Background(), logger, tempDir, t.TempDir(), "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no manifests found")
|
|
})
|
|
|
|
t.Run("successful extraction", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "algorithm.py", testPythonScript)
|
|
algoPath, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "python")
|
|
require.NoError(t, err)
|
|
assert.NotEmpty(t, algoPath)
|
|
assert.Contains(t, algoPath, "algorithm.py")
|
|
})
|
|
}
|
|
|
|
func TestExtractDataset(t *testing.T) {
|
|
t.Run("missing index.json", func(t *testing.T) {
|
|
tempDir := t.TempDir()
|
|
_, err := ExtractDataset(tempDir, t.TempDir())
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to read index.json")
|
|
})
|
|
|
|
t.Run("successful extraction", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "data.csv", "col1,col2\n1,2")
|
|
files, err := ExtractDataset(ociDir, destDir)
|
|
require.NoError(t, err)
|
|
assert.NotEmpty(t, files)
|
|
})
|
|
}
|
|
|
|
func TestExtractDatasetWithPathTraversal(t *testing.T) {
|
|
t.Run("path traversal skipped, valid file extracted", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Path traversal entry (should be skipped)
|
|
maliciousHdr := &tar.Header{
|
|
Name: "../../../tmp/evil.csv",
|
|
Mode: 0o644,
|
|
Size: int64(len("evil")),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(maliciousHdr))
|
|
_, err = tw.Write([]byte("evil"))
|
|
require.NoError(t, err)
|
|
|
|
// Valid CSV file
|
|
csvContent := "col1,col2\n1,2"
|
|
csvHdr := &tar.Header{
|
|
Name: "data.csv",
|
|
Mode: 0o644,
|
|
Size: int64(len(csvContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(csvHdr))
|
|
_, err = tw.Write([]byte(csvContent))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
files, err := ExtractDataset(ociDir, destDir)
|
|
require.NoError(t, err)
|
|
assert.Len(t, files, 1)
|
|
assert.Contains(t, files[0], "data.csv")
|
|
|
|
// Verify malicious file was NOT created outside destDir
|
|
_, err = os.Stat("/tmp/evil.csv")
|
|
assert.True(t, os.IsNotExist(err))
|
|
})
|
|
}
|
|
|
|
func TestExtractDatasetInvalidManifest(t *testing.T) {
|
|
t.Run("invalid manifest JSON", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), []byte("not json"), 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: 8}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, err := ExtractDataset(ociDir, t.TempDir())
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to parse manifest")
|
|
})
|
|
}
|
|
|
|
func TestExtractDatasetWithDirectory(t *testing.T) {
|
|
t.Run("layer with directory entries for dataset", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Directory entry
|
|
dirHdr := &tar.Header{
|
|
Name: "data/",
|
|
Mode: 0o755,
|
|
Typeflag: tar.TypeDir,
|
|
}
|
|
require.NoError(t, tw.WriteHeader(dirHdr))
|
|
|
|
// CSV inside directory
|
|
csvContent := "a,b\n1,2"
|
|
csvHdr := &tar.Header{
|
|
Name: "data/dataset.csv",
|
|
Mode: 0o644,
|
|
Size: int64(len(csvContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(csvHdr))
|
|
_, err = tw.Write([]byte(csvContent))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
files, err := ExtractDataset(ociDir, destDir)
|
|
require.NoError(t, err)
|
|
require.Len(t, files, 1)
|
|
assert.Contains(t, files[0], "dataset.csv")
|
|
})
|
|
}
|
|
|
|
func TestExtractDatasetMissingManifest(t *testing.T) {
|
|
t.Run("manifest file not found", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:nonexistent", Size: 0}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, err := ExtractDataset(ociDir, t.TempDir())
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to read manifest")
|
|
})
|
|
}
|
|
|
|
func TestOCILayoutStructure(t *testing.T) {
|
|
t.Run("OCILayout JSON serialization", func(t *testing.T) {
|
|
layout := OCILayout{ImageLayoutVersion: "1.0.0"}
|
|
|
|
data, err := json.Marshal(layout)
|
|
require.NoError(t, err)
|
|
|
|
var decoded OCILayout
|
|
err = json.Unmarshal(data, &decoded)
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, layout.ImageLayoutVersion, decoded.ImageLayoutVersion)
|
|
})
|
|
}
|
|
|
|
func setupTestOCIImage(t *testing.T, filename, content string) (ociDir, destDir string) {
|
|
t.Helper()
|
|
|
|
ociDir = t.TempDir()
|
|
destDir = t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
hdr := &tar.Header{
|
|
Name: filename,
|
|
Mode: 0o644,
|
|
Size: int64(len(content)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(hdr))
|
|
_, err = tw.Write([]byte(content))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, err := json.Marshal(manifest)
|
|
require.NoError(t, err)
|
|
manifestPath := filepath.Join(blobsDir, "manifest123")
|
|
require.NoError(t, os.WriteFile(manifestPath, manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{
|
|
MediaType: "application/vnd.oci.image.manifest.v1+json",
|
|
Digest: "sha256:manifest123",
|
|
Size: len(manifestData),
|
|
}},
|
|
}
|
|
indexData, err := json.Marshal(index)
|
|
require.NoError(t, err)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
return ociDir, destDir
|
|
}
|
|
|
|
func TestExtractAlgorithmWithRequirements(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("extract algorithm with requirements.txt", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Add algorithm file
|
|
algoContent := testPythonScript
|
|
algoHdr := &tar.Header{
|
|
Name: "main.py",
|
|
Mode: 0o644,
|
|
Size: int64(len(algoContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(algoHdr))
|
|
_, err = tw.Write([]byte(algoContent))
|
|
require.NoError(t, err)
|
|
|
|
// Add requirements.txt
|
|
reqContent := "numpy==1.21.0\npandas==1.3.0"
|
|
reqHdr := &tar.Header{
|
|
Name: "requirements.txt",
|
|
Mode: 0o644,
|
|
Size: int64(len(reqContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(reqHdr))
|
|
_, err = tw.Write([]byte(reqContent))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
// Create manifest and index
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, err := json.Marshal(manifest)
|
|
require.NoError(t, err)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, err := json.Marshal(index)
|
|
require.NoError(t, err)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
algoPath, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "python")
|
|
require.NoError(t, err)
|
|
assert.Contains(t, algoPath, "main.py")
|
|
|
|
// Verify requirements.txt was also extracted
|
|
reqPath := filepath.Join(destDir, "requirements.txt")
|
|
_, err = os.Stat(reqPath)
|
|
assert.NoError(t, err)
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmNoAlgoFile(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("no algorithm file in layers", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Add a non-algorithm file (e.g., just a readme)
|
|
readmeContent := "This is a readme"
|
|
readmeHdr := &tar.Header{
|
|
Name: "README.md",
|
|
Mode: 0o644,
|
|
Size: int64(len(readmeContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(readmeHdr))
|
|
_, err = tw.Write([]byte(readmeContent))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, _, err = ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no algorithm file found")
|
|
})
|
|
}
|
|
|
|
func TestExtractDatasetNoDataFiles(t *testing.T) {
|
|
t.Run("no data files in layers", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Add a python file (not a data file)
|
|
pyContent := testPythonScript
|
|
pyHdr := &tar.Header{
|
|
Name: "script.py",
|
|
Mode: 0o644,
|
|
Size: int64(len(pyContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(pyHdr))
|
|
_, err = tw.Write([]byte(pyContent))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, err = ExtractDataset(ociDir, destDir)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no dataset files found")
|
|
})
|
|
|
|
t.Run("corrupt layer file", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "layer123"), []byte("not a gzip"), 0o644))
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
// ExtractDataset logs a warning and continues if a layer fails, but if ALL fail it errors
|
|
_, err := ExtractDataset(ociDir, t.TempDir())
|
|
assert.Error(t, err)
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmInvalidManifest(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("invalid manifest JSON", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
// Write invalid manifest
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), []byte("not json"), 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: 8}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to parse manifest")
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmMissingManifest(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("manifest file not found", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
// Don't create manifest file
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:missing123", Size: 8}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "failed to read manifest")
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmWithDirectory(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("layer with directory entries", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Add a directory entry
|
|
dirHdr := &tar.Header{
|
|
Name: "src/",
|
|
Mode: 0o755,
|
|
Typeflag: tar.TypeDir,
|
|
}
|
|
require.NoError(t, tw.WriteHeader(dirHdr))
|
|
|
|
// Add algorithm file in subdirectory
|
|
algoContent := testPythonScript
|
|
algoHdr := &tar.Header{
|
|
Name: "src/main.py",
|
|
Mode: 0o644,
|
|
Size: int64(len(algoContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(algoHdr))
|
|
_, err = tw.Write([]byte(algoContent))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
algoPath, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "python")
|
|
require.NoError(t, err)
|
|
assert.Contains(t, algoPath, "main.py")
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmPathTraversal(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("path traversal attempt", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
layerPath := filepath.Join(blobsDir, "layer123")
|
|
layerFile, err := os.Create(layerPath)
|
|
require.NoError(t, err)
|
|
|
|
gw := gzip.NewWriter(layerFile)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Add a file with path traversal attempt
|
|
maliciousContent := "malicious"
|
|
maliciousHdr := &tar.Header{
|
|
Name: "../../../etc/malicious.py",
|
|
Mode: 0o644,
|
|
Size: int64(len(maliciousContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(maliciousHdr))
|
|
_, err = tw.Write([]byte(maliciousContent))
|
|
require.NoError(t, err)
|
|
|
|
// Add a legit file
|
|
algoContent := testPythonScript
|
|
algoHdr := &tar.Header{
|
|
Name: "algorithm.py",
|
|
Mode: 0o644,
|
|
Size: int64(len(algoContent)),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(algoHdr))
|
|
_, err = tw.Write([]byte(algoContent))
|
|
require.NoError(t, err)
|
|
|
|
require.NoError(t, tw.Close())
|
|
require.NoError(t, gw.Close())
|
|
require.NoError(t, layerFile.Close())
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:layer123"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
algoPath, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "python")
|
|
require.NoError(t, err)
|
|
assert.Contains(t, algoPath, "algorithm.py")
|
|
|
|
// Verify malicious file was NOT extracted outside destDir
|
|
_, err = os.Stat("/etc/malicious.py")
|
|
assert.True(t, os.IsNotExist(err))
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmErrorPathsAdditional(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("invalid layer gzip", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "main.py", "print('hello')")
|
|
// Corrupt the layer file
|
|
layerPath := filepath.Join(ociDir, "blobs", "sha256", "layer123")
|
|
err := os.WriteFile(layerPath, []byte("not gzip"), 0o644)
|
|
require.NoError(t, err)
|
|
|
|
_, _, err = ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no algorithm file found")
|
|
})
|
|
|
|
t.Run("invalid tar formatting", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "main.py", "print('hello')")
|
|
layerPath := filepath.Join(ociDir, "blobs", "sha256", "layer123")
|
|
|
|
// Create a valid gzip but invalid tar
|
|
var buf bytes.Buffer
|
|
gw := gzip.NewWriter(&buf)
|
|
_, err := gw.Write([]byte("not a tar archive but it is gzipped"))
|
|
require.NoError(t, err)
|
|
gw.Close()
|
|
err = os.WriteFile(layerPath, buf.Bytes(), 0o644)
|
|
require.NoError(t, err)
|
|
|
|
_, _, err = ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no algorithm file found")
|
|
})
|
|
|
|
t.Run("non-existent layer file", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:nonexistent"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no algorithm file found")
|
|
})
|
|
}
|
|
|
|
func TestExtractDatasetErrorPathsAdditional(t *testing.T) {
|
|
t.Run("invalid layer gzip", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "data.csv", "a,b,c")
|
|
layerPath := filepath.Join(ociDir, "blobs", "sha256", "layer123")
|
|
err := os.WriteFile(layerPath, []byte("not gzip"), 0o644)
|
|
require.NoError(t, err)
|
|
|
|
_, err = ExtractDataset(ociDir, destDir)
|
|
assert.Error(t, err)
|
|
})
|
|
|
|
t.Run("non-existent layer file", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:nonexistent"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "manifest123"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:manifest123", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, err := ExtractDataset(ociDir, destDir)
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no dataset files found")
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmAdditionalTypes(t *testing.T) {
|
|
t.Run("isAlgorithmFile additional types", func(t *testing.T) {
|
|
assert.False(t, isAlgorithmFile("any", 0o644, "docker"))
|
|
assert.False(t, isAlgorithmFile("any", 0o644, "unknown"))
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithmErrorPathsInternal(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("failed to create directory", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "algorithm.py", "print('hello')")
|
|
|
|
// Create a file where a directory should be
|
|
blockedDir := filepath.Join(destDir, "blocked")
|
|
require.NoError(t, os.WriteFile(blockedDir, []byte("data"), 0o644))
|
|
|
|
// Try to extract an algorithm that would need to create a directory where a file exists
|
|
layerPath := filepath.Join(ociDir, "blobs", "sha256", "layer123")
|
|
var buf bytes.Buffer
|
|
gw := gzip.NewWriter(&buf)
|
|
tw := tar.NewWriter(gw)
|
|
hdr := &tar.Header{
|
|
Name: "blocked/main.py",
|
|
Mode: 0o644,
|
|
Size: int64(len("print(1)")),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(hdr))
|
|
_, _ = tw.Write([]byte("print(1)"))
|
|
tw.Close()
|
|
gw.Close()
|
|
require.NoError(t, os.WriteFile(layerPath, buf.Bytes(), 0o644))
|
|
|
|
_, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "python")
|
|
assert.Error(t, err)
|
|
})
|
|
|
|
t.Run("failed to create file", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "algorithm.py", "print('hello')")
|
|
|
|
// Create a directory where a file should be
|
|
blockedFile := filepath.Join(destDir, "algorithm.py")
|
|
require.NoError(t, os.MkdirAll(blockedFile, 0o755))
|
|
|
|
_, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "python")
|
|
assert.Error(t, err)
|
|
})
|
|
}
|
|
|
|
func TestExtractDatasetErrorPathsInternal(t *testing.T) {
|
|
t.Run("failed to create directory for dataset", func(t *testing.T) {
|
|
ociDir, destDir := setupTestOCIImage(t, "data.csv", "a,b,c")
|
|
|
|
blockedDir := filepath.Join(destDir, "blocked")
|
|
require.NoError(t, os.WriteFile(blockedDir, []byte("data"), 0o644))
|
|
|
|
layerPath := filepath.Join(ociDir, "blobs", "sha256", "layer123")
|
|
var buf bytes.Buffer
|
|
gw := gzip.NewWriter(&buf)
|
|
tw := tar.NewWriter(gw)
|
|
hdr := &tar.Header{
|
|
Name: "blocked/data.csv",
|
|
Mode: 0o644,
|
|
Size: int64(len("a,b")),
|
|
}
|
|
require.NoError(t, tw.WriteHeader(hdr))
|
|
_, _ = tw.Write([]byte("a,b"))
|
|
tw.Close()
|
|
gw.Close()
|
|
require.NoError(t, os.WriteFile(layerPath, buf.Bytes(), 0o644))
|
|
|
|
_, err := ExtractDataset(ociDir, destDir)
|
|
assert.Error(t, err)
|
|
})
|
|
}
|
|
|
|
func TestExtractAlgorithm_PythonNoRequirements(t *testing.T) {
|
|
logger := slog.Default()
|
|
ociDir, destDir := setupTestOCIImage(t, "main.py", testPythonScript)
|
|
algoPath, reqPath, err := ExtractAlgorithm(context.Background(), logger, ociDir, destDir, "python")
|
|
require.NoError(t, err)
|
|
assert.NotEmpty(t, algoPath)
|
|
assert.Empty(t, reqPath)
|
|
}
|
|
|
|
func TestExtractDataset_MultipleLayers(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
destDir := t.TempDir()
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
|
|
createLayer := func(name, filename, content string) string {
|
|
path := filepath.Join(blobsDir, name)
|
|
f, err := os.Create(path)
|
|
require.NoError(t, err)
|
|
gw := gzip.NewWriter(f)
|
|
tw := tar.NewWriter(gw)
|
|
hdr := &tar.Header{Name: filename, Mode: 0o644, Size: int64(len(content))}
|
|
err = tw.WriteHeader(hdr)
|
|
require.NoError(t, err)
|
|
_, err = tw.Write([]byte(content))
|
|
require.NoError(t, err)
|
|
err = tw.Close()
|
|
require.NoError(t, err)
|
|
err = gw.Close()
|
|
require.NoError(t, err)
|
|
err = f.Close()
|
|
require.NoError(t, err)
|
|
return "sha256:" + name
|
|
}
|
|
|
|
layer1 := createLayer("l1", "data1.csv", "1,2")
|
|
layer2 := createLayer("l2", "data2.csv", "3,4")
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: layer1}, {Digest: layer2}},
|
|
}
|
|
manifestData, err := json.Marshal(manifest)
|
|
require.NoError(t, err)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "m1"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:m1", Size: len(manifestData)}},
|
|
}
|
|
indexData, err := json.Marshal(index)
|
|
require.NoError(t, err)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
files, err := ExtractDataset(ociDir, destDir)
|
|
require.NoError(t, err)
|
|
assert.Len(t, files, 2)
|
|
}
|
|
|
|
func TestExtractAlgorithm_ErrorPaths(t *testing.T) {
|
|
logger := slog.Default()
|
|
|
|
t.Run("invalid layer gzip", func(t *testing.T) {
|
|
ociDir := t.TempDir()
|
|
blobsDir := filepath.Join(ociDir, "blobs", "sha256")
|
|
require.NoError(t, os.MkdirAll(blobsDir, 0o755))
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "l1"), []byte("not gzip"), 0o644))
|
|
|
|
manifest := struct {
|
|
Layers []struct {
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}{
|
|
Layers: []struct {
|
|
Digest string `json:"digest"`
|
|
}{{Digest: "sha256:l1"}},
|
|
}
|
|
manifestData, _ := json.Marshal(manifest)
|
|
require.NoError(t, os.WriteFile(filepath.Join(blobsDir, "m1"), manifestData, 0o644))
|
|
|
|
index := OCIIndex{
|
|
SchemaVersion: 2,
|
|
Manifests: []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
Size int `json:"size"`
|
|
}{{Digest: "sha256:m1", Size: len(manifestData)}},
|
|
}
|
|
indexData, _ := json.Marshal(index)
|
|
require.NoError(t, os.WriteFile(filepath.Join(ociDir, "index.json"), indexData, 0o644))
|
|
|
|
_, _, err := ExtractAlgorithm(context.Background(), logger, ociDir, t.TempDir(), "bin")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no algorithm file found")
|
|
})
|
|
}
|