package datasets import ( "errors" "fmt" "os" "path/filepath" "sort" "strings" "time" ) // LocalStore stores dataset files on the local filesystem. // // Layout under Root: // // .bin — committed dataset (binary cube) // .bin.downloading — in-progress dataset // .bin.manifest.json — manifest of completed work units // // The .bin suffix exists to differentiate from sidecars in directory listings; // epoch is formatted as "20060102T150405Z" (UTC). type LocalStore struct { Root string Source string // source ID, recorded for safety but currently advisory Extension string // default ".bin" } // NewLocalStore returns a LocalStore at root. The directory is created if missing. func NewLocalStore(root, sourceID string) (*LocalStore, error) { if err := os.MkdirAll(root, 0o755); err != nil { return nil, fmt.Errorf("create store root %s: %w", root, err) } return &LocalStore{Root: root, Source: sourceID, Extension: ".bin"}, nil } // SourceID returns the source ID this store is configured for. func (s *LocalStore) SourceID() string { return s.Source } const epochFormat = "20060102T150405Z" func (s *LocalStore) ext() string { if s.Extension == "" { return ".bin" } return s.Extension } // Path returns the canonical path for an epoch's committed dataset file. func (s *LocalStore) Path(epoch time.Time) string { return filepath.Join(s.Root, epoch.UTC().Format(epochFormat)+s.ext()) } func (s *LocalStore) tempPath(epoch time.Time) string { return s.Path(epoch) + ".downloading" } func (s *LocalStore) manifestPath(epoch time.Time) string { return s.Path(epoch) + ".manifest.json" } // Exists reports whether a committed dataset for epoch is present. func (s *LocalStore) Exists(epoch time.Time) bool { info, err := os.Stat(s.Path(epoch)) return err == nil && !info.IsDir() } // List returns all committed epochs, newest first. func (s *LocalStore) List() ([]time.Time, error) { entries, err := os.ReadDir(s.Root) if err != nil { return nil, fmt.Errorf("read store: %w", err) } var out []time.Time ext := s.ext() for _, e := range entries { if e.IsDir() { continue } name := e.Name() if !strings.HasSuffix(name, ext) { continue } stem := strings.TrimSuffix(name, ext) // skip in-progress files (their stem already has .bin.downloading...) if strings.Contains(stem, ".") { continue } t, err := time.Parse(epochFormat, stem) if err != nil { continue } out = append(out, t.UTC()) } sort.Slice(out, func(i, j int) bool { return out[i].After(out[j]) }) return out, nil } // Remove deletes the committed dataset and any sidecar files for epoch. func (s *LocalStore) Remove(epoch time.Time) error { var errs []error for _, p := range []string{s.Path(epoch), s.tempPath(epoch), s.manifestPath(epoch)} { if err := os.Remove(p); err != nil && !errors.Is(err, os.ErrNotExist) { errs = append(errs, err) } } if len(errs) > 0 { return fmt.Errorf("remove dataset: %v", errs) } return nil } // BeginWrite opens or resumes a TempHandle for epoch. // // If a partial download is already present, its file and manifest are reused // so the new download picks up where the previous one stopped. func (s *LocalStore) BeginWrite(epoch time.Time) (TempHandle, error) { man, err := LoadManifest(s.manifestPath(epoch)) if err != nil { return nil, err } return &localHandle{ store: s, epoch: epoch, manifest: man, }, nil } type localHandle struct { store *LocalStore epoch time.Time manifest *Manifest closed bool } func (h *localHandle) Path() string { return h.store.tempPath(h.epoch) } func (h *localHandle) Manifest() *Manifest { return h.manifest } // Commit promotes the temp file to its final path and removes the manifest. func (h *localHandle) Commit() error { if h.closed { return nil } h.closed = true if err := os.Rename(h.store.tempPath(h.epoch), h.store.Path(h.epoch)); err != nil { return fmt.Errorf("commit rename: %w", err) } if err := os.Remove(h.store.manifestPath(h.epoch)); err != nil && !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("commit remove manifest: %w", err) } return nil } // Abort removes the in-progress file and manifest. func (h *localHandle) Abort() error { if h.closed { return nil } h.closed = true var firstErr error for _, p := range []string{h.store.tempPath(h.epoch), h.store.manifestPath(h.epoch)} { if err := os.Remove(p); err != nil && !errors.Is(err, os.ErrNotExist) && firstErr == nil { firstErr = err } } return firstErr }