package datasets import ( "context" "errors" "fmt" "sync" "sync/atomic" "time" "github.com/google/uuid" "go.uber.org/zap" "predictor-refactored/internal/weather" ) // JobStatus is the lifecycle state of a download job. type JobStatus string const ( JobPending JobStatus = "pending" JobRunning JobStatus = "running" JobComplete JobStatus = "complete" JobFailed JobStatus = "failed" JobCancelled JobStatus = "cancelled" ) // JobInfo is the externally-visible snapshot of a download job. type JobInfo struct { ID string Source string Epoch time.Time Status JobStatus StartedAt time.Time EndedAt *time.Time Err string Total int Done int Bytes int64 } // jobEntry is the Manager's mutable record for one job. type jobEntry struct { id string source string epoch time.Time startedAt time.Time cancel context.CancelFunc mu sync.Mutex status JobStatus endedAt time.Time errStr string total atomic.Int64 done atomic.Int64 bytes atomic.Int64 } func (e *jobEntry) snapshot() JobInfo { e.mu.Lock() info := JobInfo{ ID: e.id, Source: e.source, Epoch: e.epoch, StartedAt: e.startedAt, Status: e.status, Err: e.errStr, } if !e.endedAt.IsZero() { ts := e.endedAt info.EndedAt = &ts } e.mu.Unlock() info.Total = int(e.total.Load()) info.Done = int(e.done.Load()) info.Bytes = e.bytes.Load() return info } // jobProgress is the ProgressSink wired into a jobEntry. type jobProgress struct{ e *jobEntry } func (p jobProgress) SetTotal(n int) { p.e.total.Store(int64(n)) } func (p jobProgress) StepComplete() { p.e.done.Add(1) } func (p jobProgress) Bytes(n int64) { p.e.bytes.Add(n) } // Manager coordinates dataset downloads and exposes the active WindField. type Manager struct { src Source store Storage throttle Throttle log *zap.Logger activeMu sync.RWMutex active weather.WindField jobsMu sync.RWMutex jobs map[string]*jobEntry // inFlight maps an epoch's RFC3339 representation to its jobID, enforcing // single-flight per epoch. inFlight sync.Map } // New returns a Manager wiring source, store, and an optional throttle. // A nil log uses zap.NewNop(). func New(src Source, store Storage, throttle Throttle, log *zap.Logger) *Manager { if log == nil { log = zap.NewNop() } if src.ID() != store.SourceID() { log.Warn("source/store ID mismatch", zap.String("src", src.ID()), zap.String("store", store.SourceID())) } return &Manager{ src: src, store: store, throttle: throttle, log: log, jobs: make(map[string]*jobEntry), } } // Source returns the underlying source ID. func (m *Manager) Source() string { return m.src.ID() } // Active returns the currently-loaded WindField, or nil. func (m *Manager) Active() weather.WindField { m.activeMu.RLock() defer m.activeMu.RUnlock() return m.active } // Ready reports whether a dataset is currently loaded. func (m *Manager) Ready() bool { return m.Active() != nil } // ListEpochs returns all stored dataset epochs, newest first. func (m *Manager) ListEpochs() ([]time.Time, error) { return m.store.List() } // ListJobs returns snapshots of every job recorded since startup. func (m *Manager) ListJobs() []JobInfo { m.jobsMu.RLock() defer m.jobsMu.RUnlock() out := make([]JobInfo, 0, len(m.jobs)) for _, e := range m.jobs { out = append(out, e.snapshot()) } return out } // GetJob returns the snapshot for a job, or false if id is unknown. func (m *Manager) GetJob(id string) (JobInfo, bool) { m.jobsMu.RLock() e, ok := m.jobs[id] m.jobsMu.RUnlock() if !ok { return JobInfo{}, false } return e.snapshot(), true } // CancelJob cancels a running job. Returns false if id is unknown or the // job is already terminal. func (m *Manager) CancelJob(id string) bool { m.jobsMu.RLock() e, ok := m.jobs[id] m.jobsMu.RUnlock() if !ok { return false } e.mu.Lock() terminal := e.status == JobComplete || e.status == JobFailed || e.status == JobCancelled e.mu.Unlock() if terminal { return false } e.cancel() return true } // RemoveEpoch deletes a stored dataset. If epoch is currently active, the // active field is cleared. func (m *Manager) RemoveEpoch(epoch time.Time) error { epoch = epoch.UTC() if active := m.Active(); active != nil && active.Epoch().Equal(epoch) { m.activeMu.Lock() m.active = nil m.activeMu.Unlock() } return m.store.Remove(epoch) } // Download starts (or resumes) a download job for epoch in the background. // Returns the JobID. If a job for the same epoch is already running, its // existing JobID is returned. // // If the dataset is already present on disk, a synthetic completed JobInfo // is recorded and its JobID returned. func (m *Manager) Download(epoch time.Time) string { epoch = epoch.UTC() key := epoch.Format(time.RFC3339) if existing, ok := m.inFlight.Load(key); ok { return existing.(string) } jobID := uuid.New().String() if other, loaded := m.inFlight.LoadOrStore(key, jobID); loaded { return other.(string) } ctx, cancel := context.WithCancel(context.Background()) now := time.Now().UTC() e := &jobEntry{ id: jobID, source: m.src.ID(), epoch: epoch, startedAt: now, status: JobPending, cancel: cancel, } m.jobsMu.Lock() m.jobs[jobID] = e m.jobsMu.Unlock() if m.store.Exists(epoch) { // Skip the download but still record the job for traceability. go m.completeShortCircuit(ctx, e) return jobID } go m.runDownload(ctx, e) return jobID } // LoadEpoch swaps the active WindField to epoch's stored dataset. func (m *Manager) LoadEpoch(ctx context.Context, epoch time.Time) error { epoch = epoch.UTC() if !m.store.Exists(epoch) { return fmt.Errorf("epoch %s not present on disk", epoch.Format(time.RFC3339)) } field, err := m.src.Open(ctx, epoch, m.store) if err != nil { return fmt.Errorf("open epoch: %w", err) } m.swapActive(field) m.log.Info("loaded dataset", zap.Time("epoch", epoch), zap.String("source", m.src.ID())) return nil } // Refresh ensures the most recent upstream dataset is downloaded and active. // // If the freshest stored dataset is newer than retentionTTL old, no upstream // check is performed. Otherwise the source's LatestEpoch is consulted; if it // is newer than the active dataset, a download is started and on completion // the new dataset becomes active. // // Returns the JobID started, or empty string when nothing was scheduled. func (m *Manager) Refresh(ctx context.Context, freshnessTTL time.Duration) (string, error) { if active := m.Active(); active != nil && time.Since(active.Epoch()) < freshnessTTL { return "", nil } // Try loading the freshest existing dataset before going to the network. if epochs, err := m.store.List(); err == nil { for _, e := range epochs { if time.Since(e) > freshnessTTL { continue } if active := m.Active(); active != nil && active.Epoch().Equal(e) { return "", nil } if err := m.LoadEpoch(ctx, e); err == nil { return "", nil } } } latest, err := m.src.LatestEpoch(ctx) if err != nil { return "", fmt.Errorf("latest epoch: %w", err) } if active := m.Active(); active != nil && !latest.After(active.Epoch()) { return "", nil } jobID := m.Download(latest) // Spawn a watcher that loads the dataset on successful completion. go func() { for { info, ok := m.GetJob(jobID) if !ok { return } switch info.Status { case JobComplete: if err := m.LoadEpoch(context.Background(), latest); err != nil { m.log.Error("load after download", zap.Error(err)) } return case JobFailed, JobCancelled: return } time.Sleep(2 * time.Second) } }() return jobID, nil } // runDownload executes one Source.Download invocation and records its outcome. func (m *Manager) runDownload(ctx context.Context, e *jobEntry) { defer m.inFlight.Delete(e.epoch.Format(time.RFC3339)) e.mu.Lock() e.status = JobRunning e.mu.Unlock() m.log.Info("download started", zap.String("job", e.id), zap.Time("epoch", e.epoch)) err := m.src.Download(ctx, e.epoch, m.store, jobProgress{e: e}, m.throttle) now := time.Now().UTC() e.mu.Lock() e.endedAt = now switch { case errors.Is(err, context.Canceled): e.status = JobCancelled case err != nil: e.status = JobFailed e.errStr = err.Error() default: e.status = JobComplete } finalStatus := e.status e.mu.Unlock() m.log.Info("download finished", zap.String("job", e.id), zap.String("status", string(finalStatus)), zap.NamedError("err", err)) } // completeShortCircuit records a job as complete without performing any work. func (m *Manager) completeShortCircuit(ctx context.Context, e *jobEntry) { _ = ctx defer m.inFlight.Delete(e.epoch.Format(time.RFC3339)) now := time.Now().UTC() e.mu.Lock() e.status = JobComplete e.endedAt = now e.mu.Unlock() } // swapActive replaces the active field and closes the previous one if it // implements io.Closer. func (m *Manager) swapActive(f weather.WindField) { m.activeMu.Lock() old := m.active m.active = f m.activeMu.Unlock() if c, ok := old.(interface{ Close() error }); ok && c != nil { if err := c.Close(); err != nil { m.log.Warn("close old dataset", zap.Error(err)) } } } // Close releases all resources, cancelling any in-flight jobs. func (m *Manager) Close() error { m.jobsMu.Lock() for _, e := range m.jobs { e.cancel() } m.jobsMu.Unlock() m.activeMu.Lock() active := m.active m.active = nil m.activeMu.Unlock() if c, ok := active.(interface{ Close() error }); ok && c != nil { return c.Close() } return nil }