predictor/internal/datasets/manifest.go
2026-05-18 03:17:17 +09:00

118 lines
2.7 KiB
Go

package datasets
import (
"encoding/json"
"errors"
"fmt"
"os"
"sort"
"sync"
)
// Manifest tracks completed work units for a partial dataset download.
// Units are arbitrary opaque strings; sources choose the format
// (e.g. "step12-A" for "forecast step 12, level set A").
//
// A Manifest is persisted as a JSON object: {"units": ["step0-A", "step0-B", ...]}.
type Manifest struct {
path string
mu sync.Mutex
units map[string]struct{}
}
// LoadManifest opens or creates the manifest at path. Missing or unreadable
// files are treated as empty; a corrupt file returns an error.
func LoadManifest(path string) (*Manifest, error) {
m := &Manifest{path: path, units: make(map[string]struct{})}
data, err := os.ReadFile(path)
if errors.Is(err, os.ErrNotExist) {
return m, nil
}
if err != nil {
return nil, fmt.Errorf("read manifest %s: %w", path, err)
}
if len(data) == 0 {
return m, nil
}
var doc struct {
Units []string `json:"units"`
}
if err := json.Unmarshal(data, &doc); err != nil {
return nil, fmt.Errorf("parse manifest %s: %w", path, err)
}
for _, u := range doc.Units {
m.units[u] = struct{}{}
}
return m, nil
}
// Has reports whether unit has been recorded as completed.
func (m *Manifest) Has(unit string) bool {
m.mu.Lock()
defer m.mu.Unlock()
_, ok := m.units[unit]
return ok
}
// Mark records unit as completed and persists the manifest to disk.
func (m *Manifest) Mark(unit string) error {
m.mu.Lock()
defer m.mu.Unlock()
if _, ok := m.units[unit]; ok {
return nil
}
m.units[unit] = struct{}{}
return m.persistLocked()
}
// Units returns the completed units in sorted order.
func (m *Manifest) Units() []string {
m.mu.Lock()
defer m.mu.Unlock()
out := make([]string, 0, len(m.units))
for u := range m.units {
out = append(out, u)
}
sort.Strings(out)
return out
}
// Reset clears all recorded units and removes the manifest file.
func (m *Manifest) Reset() error {
m.mu.Lock()
defer m.mu.Unlock()
m.units = make(map[string]struct{})
if err := os.Remove(m.path); err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("remove manifest %s: %w", m.path, err)
}
return nil
}
// persistLocked writes the manifest to disk via temp+rename.
// The caller must hold m.mu.
func (m *Manifest) persistLocked() error {
units := make([]string, 0, len(m.units))
for u := range m.units {
units = append(units, u)
}
sort.Strings(units)
data, err := json.Marshal(struct {
Units []string `json:"units"`
}{Units: units})
if err != nil {
return err
}
tmp := m.path + ".new"
if err := os.WriteFile(tmp, data, 0o644); err != nil {
return fmt.Errorf("write manifest temp: %w", err)
}
if err := os.Rename(tmp, m.path); err != nil {
os.Remove(tmp)
return fmt.Errorf("rename manifest: %w", err)
}
return nil
}