predictor/internal/datasets/store_local.go
2026-05-23 00:55:35 +09:00

180 lines
4.8 KiB
Go

package datasets
import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
// LocalStore stores dataset files on the local filesystem.
//
// Layout under Root:
//
// <filename>.bin — committed dataset
// <filename>.bin.downloading — in-progress dataset
// <filename>.bin.manifest.json — completed work units
//
// where <filename> is DatasetID.Filename() — typically
// "20060102T150405Z" for the global subset or
// "20060102T150405Z_r-10.10.-30.30_h0.72" for a subset.
type LocalStore struct {
Root string
Source string
Extension string // default ".bin"
}
// NewLocalStore returns a LocalStore at root. The directory is created if missing.
func NewLocalStore(root, sourceID string) (*LocalStore, error) {
if err := os.MkdirAll(root, 0o755); err != nil {
return nil, fmt.Errorf("create store root %s: %w", root, err)
}
return &LocalStore{Root: root, Source: sourceID, Extension: ".bin"}, nil
}
// SourceID returns the source ID this store is configured for.
func (s *LocalStore) SourceID() string { return s.Source }
func (s *LocalStore) ext() string {
if s.Extension == "" {
return ".bin"
}
return s.Extension
}
// Path returns the canonical path for id's committed dataset.
func (s *LocalStore) Path(id DatasetID) string {
return filepath.Join(s.Root, id.Filename()+s.ext())
}
func (s *LocalStore) tempPath(id DatasetID) string {
return s.Path(id) + ".downloading"
}
func (s *LocalStore) manifestPath(id DatasetID) string {
return s.Path(id) + ".manifest.json"
}
// Exists reports whether a committed dataset for id is present.
func (s *LocalStore) Exists(id DatasetID) bool {
info, err := os.Stat(s.Path(id))
return err == nil && !info.IsDir()
}
// List returns all committed dataset IDs, newest first.
func (s *LocalStore) List() ([]DatasetID, error) {
entries, err := os.ReadDir(s.Root)
if err != nil {
return nil, fmt.Errorf("read store: %w", err)
}
var out []DatasetID
ext := s.ext()
for _, e := range entries {
if e.IsDir() {
continue
}
name := e.Name()
if !strings.HasSuffix(name, ext) {
continue
}
stem := strings.TrimSuffix(name, ext)
// Skip in-progress files (their stem ends in .downloading or .manifest)
if strings.Contains(stem, ".") {
continue
}
id, ok := parseFilename(stem)
if !ok {
continue
}
out = append(out, id)
}
sort.Slice(out, func(i, j int) bool {
if !out[i].Epoch.Equal(out[j].Epoch) {
return out[i].Epoch.After(out[j].Epoch)
}
return out[i].Subset.Key() < out[j].Subset.Key()
})
return out, nil
}
// parseFilename inverts DatasetID.Filename(). The subset portion is not
// fully reversible (Key encoding is one-way for floats), so List returns
// IDs whose Subset is zero — the storage layer treats names as opaque
// identifiers. Callers wanting structured subset metadata should keep an
// out-of-band record.
func parseFilename(stem string) (DatasetID, bool) {
parts := strings.SplitN(stem, "_", 2)
epoch, err := time.Parse("20060102T150405Z", parts[0])
if err != nil {
return DatasetID{}, false
}
id := DatasetID{Epoch: epoch.UTC()}
// Subset key is opaque on disk; we don't reconstruct its parameters
// here. Admin callers track subset specs separately when they need
// the structured form.
return id, true
}
// Remove deletes the committed dataset and any sidecar files for id.
func (s *LocalStore) Remove(id DatasetID) error {
var errs []error
for _, p := range []string{s.Path(id), s.tempPath(id), s.manifestPath(id)} {
if err := os.Remove(p); err != nil && !errors.Is(err, os.ErrNotExist) {
errs = append(errs, err)
}
}
if len(errs) > 0 {
return fmt.Errorf("remove dataset: %v", errs)
}
return nil
}
// BeginWrite opens or resumes a TempHandle for id.
func (s *LocalStore) BeginWrite(id DatasetID) (TempHandle, error) {
man, err := LoadManifest(s.manifestPath(id))
if err != nil {
return nil, err
}
return &localHandle{store: s, id: id, manifest: man}, nil
}
type localHandle struct {
store *LocalStore
id DatasetID
manifest *Manifest
closed bool
}
func (h *localHandle) Path() string { return h.store.tempPath(h.id) }
func (h *localHandle) Manifest() *Manifest { return h.manifest }
func (h *localHandle) Commit() error {
if h.closed {
return nil
}
h.closed = true
if err := os.Rename(h.store.tempPath(h.id), h.store.Path(h.id)); err != nil {
return fmt.Errorf("commit rename: %w", err)
}
if err := os.Remove(h.store.manifestPath(h.id)); err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("commit remove manifest: %w", err)
}
return nil
}
func (h *localHandle) Abort() error {
if h.closed {
return nil
}
h.closed = true
var firstErr error
for _, p := range []string{h.store.tempPath(h.id), h.store.manifestPath(h.id)} {
if err := os.Remove(p); err != nil && !errors.Is(err, os.ErrNotExist) && firstErr == nil {
firstErr = err
}
}
return firstErr
}