package datasets import ( "encoding/json" "errors" "fmt" "os" "sort" "sync" ) // Manifest tracks completed work units for a partial dataset download. // Units are arbitrary opaque strings; sources choose the format // (e.g. "step12-A" for "forecast step 12, level set A"). // // A Manifest is persisted as a JSON object: {"units": ["step0-A", "step0-B", ...]}. type Manifest struct { path string mu sync.Mutex units map[string]struct{} } // LoadManifest opens or creates the manifest at path. Missing or unreadable // files are treated as empty; a corrupt file returns an error. func LoadManifest(path string) (*Manifest, error) { m := &Manifest{path: path, units: make(map[string]struct{})} data, err := os.ReadFile(path) if errors.Is(err, os.ErrNotExist) { return m, nil } if err != nil { return nil, fmt.Errorf("read manifest %s: %w", path, err) } if len(data) == 0 { return m, nil } var doc struct { Units []string `json:"units"` } if err := json.Unmarshal(data, &doc); err != nil { return nil, fmt.Errorf("parse manifest %s: %w", path, err) } for _, u := range doc.Units { m.units[u] = struct{}{} } return m, nil } // Has reports whether unit has been recorded as completed. func (m *Manifest) Has(unit string) bool { m.mu.Lock() defer m.mu.Unlock() _, ok := m.units[unit] return ok } // Mark records unit as completed and persists the manifest to disk. func (m *Manifest) Mark(unit string) error { m.mu.Lock() defer m.mu.Unlock() if _, ok := m.units[unit]; ok { return nil } m.units[unit] = struct{}{} return m.persistLocked() } // Units returns the completed units in sorted order. func (m *Manifest) Units() []string { m.mu.Lock() defer m.mu.Unlock() out := make([]string, 0, len(m.units)) for u := range m.units { out = append(out, u) } sort.Strings(out) return out } // Reset clears all recorded units and removes the manifest file. func (m *Manifest) Reset() error { m.mu.Lock() defer m.mu.Unlock() m.units = make(map[string]struct{}) if err := os.Remove(m.path); err != nil && !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("remove manifest %s: %w", m.path, err) } return nil } // persistLocked writes the manifest to disk via temp+rename. // The caller must hold m.mu. func (m *Manifest) persistLocked() error { units := make([]string, 0, len(m.units)) for u := range m.units { units = append(units, u) } sort.Strings(units) data, err := json.Marshal(struct { Units []string `json:"units"` }{Units: units}) if err != nil { return err } tmp := m.path + ".new" if err := os.WriteFile(tmp, data, 0o644); err != nil { return fmt.Errorf("write manifest temp: %w", err) } if err := os.Rename(tmp, m.path); err != nil { os.Remove(tmp) return fmt.Errorf("rename manifest: %w", err) } return nil }