package datasets import ( "errors" "fmt" "os" "path/filepath" "sort" "strings" "time" ) // LocalStore stores dataset files on the local filesystem. // // Layout under Root: // // .bin — committed dataset // .bin.downloading — in-progress dataset // .bin.manifest.json — completed work units // // where is DatasetID.Filename() — typically // "20060102T150405Z" for the global subset or // "20060102T150405Z_r-10.10.-30.30_h0.72" for a subset. type LocalStore struct { Root string Source string Extension string // default ".bin" } // NewLocalStore returns a LocalStore at root. The directory is created if missing. func NewLocalStore(root, sourceID string) (*LocalStore, error) { if err := os.MkdirAll(root, 0o755); err != nil { return nil, fmt.Errorf("create store root %s: %w", root, err) } return &LocalStore{Root: root, Source: sourceID, Extension: ".bin"}, nil } // SourceID returns the source ID this store is configured for. func (s *LocalStore) SourceID() string { return s.Source } func (s *LocalStore) ext() string { if s.Extension == "" { return ".bin" } return s.Extension } // Path returns the canonical path for id's committed dataset. func (s *LocalStore) Path(id DatasetID) string { return filepath.Join(s.Root, id.Filename()+s.ext()) } func (s *LocalStore) tempPath(id DatasetID) string { return s.Path(id) + ".downloading" } func (s *LocalStore) manifestPath(id DatasetID) string { return s.Path(id) + ".manifest.json" } // Exists reports whether a committed dataset for id is present. func (s *LocalStore) Exists(id DatasetID) bool { info, err := os.Stat(s.Path(id)) return err == nil && !info.IsDir() } // List returns all committed dataset IDs, newest first. func (s *LocalStore) List() ([]DatasetID, error) { entries, err := os.ReadDir(s.Root) if err != nil { return nil, fmt.Errorf("read store: %w", err) } var out []DatasetID ext := s.ext() for _, e := range entries { if e.IsDir() { continue } name := e.Name() if !strings.HasSuffix(name, ext) { continue } stem := strings.TrimSuffix(name, ext) // Skip in-progress files (their stem ends in .downloading or .manifest) if strings.Contains(stem, ".") { continue } id, ok := parseFilename(stem) if !ok { continue } out = append(out, id) } sort.Slice(out, func(i, j int) bool { if !out[i].Epoch.Equal(out[j].Epoch) { return out[i].Epoch.After(out[j].Epoch) } return out[i].Subset.Key() < out[j].Subset.Key() }) return out, nil } // parseFilename inverts DatasetID.Filename(). The subset portion is not // fully reversible (Key encoding is one-way for floats), so List returns // IDs whose Subset is zero — the storage layer treats names as opaque // identifiers. Callers wanting structured subset metadata should keep an // out-of-band record. func parseFilename(stem string) (DatasetID, bool) { parts := strings.SplitN(stem, "_", 2) epoch, err := time.Parse("20060102T150405Z", parts[0]) if err != nil { return DatasetID{}, false } id := DatasetID{Epoch: epoch.UTC()} // Subset key is opaque on disk; we don't reconstruct its parameters // here. Admin callers track subset specs separately when they need // the structured form. return id, true } // Remove deletes the committed dataset and any sidecar files for id. func (s *LocalStore) Remove(id DatasetID) error { var errs []error for _, p := range []string{s.Path(id), s.tempPath(id), s.manifestPath(id)} { if err := os.Remove(p); err != nil && !errors.Is(err, os.ErrNotExist) { errs = append(errs, err) } } if len(errs) > 0 { return fmt.Errorf("remove dataset: %v", errs) } return nil } // BeginWrite opens or resumes a TempHandle for id. func (s *LocalStore) BeginWrite(id DatasetID) (TempHandle, error) { man, err := LoadManifest(s.manifestPath(id)) if err != nil { return nil, err } return &localHandle{store: s, id: id, manifest: man}, nil } type localHandle struct { store *LocalStore id DatasetID manifest *Manifest closed bool } func (h *localHandle) Path() string { return h.store.tempPath(h.id) } func (h *localHandle) Manifest() *Manifest { return h.manifest } func (h *localHandle) Commit() error { if h.closed { return nil } h.closed = true if err := os.Rename(h.store.tempPath(h.id), h.store.Path(h.id)); err != nil { return fmt.Errorf("commit rename: %w", err) } if err := os.Remove(h.store.manifestPath(h.id)); err != nil && !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("commit remove manifest: %w", err) } return nil } func (h *localHandle) Abort() error { if h.closed { return nil } h.closed = true var firstErr error for _, p := range []string{h.store.tempPath(h.id), h.store.manifestPath(h.id)} { if err := os.Remove(p); err != nil && !errors.Is(err, os.ErrNotExist) && firstErr == nil { firstErr = err } } return firstErr }