383 lines
9.4 KiB
Go
383 lines
9.4 KiB
Go
package datasets
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"go.uber.org/zap"
|
|
|
|
"predictor-refactored/internal/weather"
|
|
)
|
|
|
|
// JobStatus is the lifecycle state of a download job.
|
|
type JobStatus string
|
|
|
|
const (
|
|
JobPending JobStatus = "pending"
|
|
JobRunning JobStatus = "running"
|
|
JobComplete JobStatus = "complete"
|
|
JobFailed JobStatus = "failed"
|
|
JobCancelled JobStatus = "cancelled"
|
|
)
|
|
|
|
// JobInfo is the externally-visible snapshot of a download job.
|
|
type JobInfo struct {
|
|
ID string
|
|
Source string
|
|
Epoch time.Time
|
|
Status JobStatus
|
|
StartedAt time.Time
|
|
EndedAt *time.Time
|
|
Err string
|
|
Total int
|
|
Done int
|
|
Bytes int64
|
|
}
|
|
|
|
// jobEntry is the Manager's mutable record for one job.
|
|
type jobEntry struct {
|
|
id string
|
|
source string
|
|
epoch time.Time
|
|
startedAt time.Time
|
|
cancel context.CancelFunc
|
|
|
|
mu sync.Mutex
|
|
status JobStatus
|
|
endedAt time.Time
|
|
errStr string
|
|
|
|
total atomic.Int64
|
|
done atomic.Int64
|
|
bytes atomic.Int64
|
|
}
|
|
|
|
func (e *jobEntry) snapshot() JobInfo {
|
|
e.mu.Lock()
|
|
info := JobInfo{
|
|
ID: e.id, Source: e.source, Epoch: e.epoch,
|
|
StartedAt: e.startedAt, Status: e.status, Err: e.errStr,
|
|
}
|
|
if !e.endedAt.IsZero() {
|
|
ts := e.endedAt
|
|
info.EndedAt = &ts
|
|
}
|
|
e.mu.Unlock()
|
|
info.Total = int(e.total.Load())
|
|
info.Done = int(e.done.Load())
|
|
info.Bytes = e.bytes.Load()
|
|
return info
|
|
}
|
|
|
|
// jobProgress is the ProgressSink wired into a jobEntry.
|
|
type jobProgress struct{ e *jobEntry }
|
|
|
|
func (p jobProgress) SetTotal(n int) { p.e.total.Store(int64(n)) }
|
|
func (p jobProgress) StepComplete() { p.e.done.Add(1) }
|
|
func (p jobProgress) Bytes(n int64) { p.e.bytes.Add(n) }
|
|
|
|
// Manager coordinates dataset downloads and exposes the active WindField.
|
|
type Manager struct {
|
|
src Source
|
|
store Storage
|
|
throttle Throttle
|
|
log *zap.Logger
|
|
|
|
activeMu sync.RWMutex
|
|
active weather.WindField
|
|
|
|
jobsMu sync.RWMutex
|
|
jobs map[string]*jobEntry
|
|
|
|
// inFlight maps an epoch's RFC3339 representation to its jobID, enforcing
|
|
// single-flight per epoch.
|
|
inFlight sync.Map
|
|
}
|
|
|
|
// New returns a Manager wiring source, store, and an optional throttle.
|
|
// A nil log uses zap.NewNop().
|
|
func New(src Source, store Storage, throttle Throttle, log *zap.Logger) *Manager {
|
|
if log == nil {
|
|
log = zap.NewNop()
|
|
}
|
|
if src.ID() != store.SourceID() {
|
|
log.Warn("source/store ID mismatch",
|
|
zap.String("src", src.ID()),
|
|
zap.String("store", store.SourceID()))
|
|
}
|
|
return &Manager{
|
|
src: src, store: store, throttle: throttle, log: log,
|
|
jobs: make(map[string]*jobEntry),
|
|
}
|
|
}
|
|
|
|
// Source returns the underlying source ID.
|
|
func (m *Manager) Source() string { return m.src.ID() }
|
|
|
|
// Active returns the currently-loaded WindField, or nil.
|
|
func (m *Manager) Active() weather.WindField {
|
|
m.activeMu.RLock()
|
|
defer m.activeMu.RUnlock()
|
|
return m.active
|
|
}
|
|
|
|
// Ready reports whether a dataset is currently loaded.
|
|
func (m *Manager) Ready() bool { return m.Active() != nil }
|
|
|
|
// ListEpochs returns all stored dataset epochs, newest first.
|
|
func (m *Manager) ListEpochs() ([]time.Time, error) { return m.store.List() }
|
|
|
|
// ListJobs returns snapshots of every job recorded since startup.
|
|
func (m *Manager) ListJobs() []JobInfo {
|
|
m.jobsMu.RLock()
|
|
defer m.jobsMu.RUnlock()
|
|
out := make([]JobInfo, 0, len(m.jobs))
|
|
for _, e := range m.jobs {
|
|
out = append(out, e.snapshot())
|
|
}
|
|
return out
|
|
}
|
|
|
|
// GetJob returns the snapshot for a job, or false if id is unknown.
|
|
func (m *Manager) GetJob(id string) (JobInfo, bool) {
|
|
m.jobsMu.RLock()
|
|
e, ok := m.jobs[id]
|
|
m.jobsMu.RUnlock()
|
|
if !ok {
|
|
return JobInfo{}, false
|
|
}
|
|
return e.snapshot(), true
|
|
}
|
|
|
|
// CancelJob cancels a running job. Returns false if id is unknown or the
|
|
// job is already terminal.
|
|
func (m *Manager) CancelJob(id string) bool {
|
|
m.jobsMu.RLock()
|
|
e, ok := m.jobs[id]
|
|
m.jobsMu.RUnlock()
|
|
if !ok {
|
|
return false
|
|
}
|
|
e.mu.Lock()
|
|
terminal := e.status == JobComplete || e.status == JobFailed || e.status == JobCancelled
|
|
e.mu.Unlock()
|
|
if terminal {
|
|
return false
|
|
}
|
|
e.cancel()
|
|
return true
|
|
}
|
|
|
|
// RemoveEpoch deletes a stored dataset. If epoch is currently active, the
|
|
// active field is cleared.
|
|
func (m *Manager) RemoveEpoch(epoch time.Time) error {
|
|
epoch = epoch.UTC()
|
|
if active := m.Active(); active != nil && active.Epoch().Equal(epoch) {
|
|
m.activeMu.Lock()
|
|
m.active = nil
|
|
m.activeMu.Unlock()
|
|
}
|
|
return m.store.Remove(epoch)
|
|
}
|
|
|
|
// Download starts (or resumes) a download job for epoch in the background.
|
|
// Returns the JobID. If a job for the same epoch is already running, its
|
|
// existing JobID is returned.
|
|
//
|
|
// If the dataset is already present on disk, a synthetic completed JobInfo
|
|
// is recorded and its JobID returned.
|
|
func (m *Manager) Download(epoch time.Time) string {
|
|
epoch = epoch.UTC()
|
|
key := epoch.Format(time.RFC3339)
|
|
|
|
if existing, ok := m.inFlight.Load(key); ok {
|
|
return existing.(string)
|
|
}
|
|
|
|
jobID := uuid.New().String()
|
|
if other, loaded := m.inFlight.LoadOrStore(key, jobID); loaded {
|
|
return other.(string)
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
now := time.Now().UTC()
|
|
e := &jobEntry{
|
|
id: jobID,
|
|
source: m.src.ID(),
|
|
epoch: epoch,
|
|
startedAt: now,
|
|
status: JobPending,
|
|
cancel: cancel,
|
|
}
|
|
m.jobsMu.Lock()
|
|
m.jobs[jobID] = e
|
|
m.jobsMu.Unlock()
|
|
|
|
if m.store.Exists(epoch) {
|
|
// Skip the download but still record the job for traceability.
|
|
go m.completeShortCircuit(ctx, e)
|
|
return jobID
|
|
}
|
|
go m.runDownload(ctx, e)
|
|
return jobID
|
|
}
|
|
|
|
// LoadEpoch swaps the active WindField to epoch's stored dataset.
|
|
func (m *Manager) LoadEpoch(ctx context.Context, epoch time.Time) error {
|
|
epoch = epoch.UTC()
|
|
if !m.store.Exists(epoch) {
|
|
return fmt.Errorf("epoch %s not present on disk", epoch.Format(time.RFC3339))
|
|
}
|
|
field, err := m.src.Open(ctx, epoch, m.store)
|
|
if err != nil {
|
|
return fmt.Errorf("open epoch: %w", err)
|
|
}
|
|
m.swapActive(field)
|
|
m.log.Info("loaded dataset",
|
|
zap.Time("epoch", epoch),
|
|
zap.String("source", m.src.ID()))
|
|
return nil
|
|
}
|
|
|
|
// Refresh ensures the most recent upstream dataset is downloaded and active.
|
|
//
|
|
// If the freshest stored dataset is newer than retentionTTL old, no upstream
|
|
// check is performed. Otherwise the source's LatestEpoch is consulted; if it
|
|
// is newer than the active dataset, a download is started and on completion
|
|
// the new dataset becomes active.
|
|
//
|
|
// Returns the JobID started, or empty string when nothing was scheduled.
|
|
func (m *Manager) Refresh(ctx context.Context, freshnessTTL time.Duration) (string, error) {
|
|
if active := m.Active(); active != nil && time.Since(active.Epoch()) < freshnessTTL {
|
|
return "", nil
|
|
}
|
|
|
|
// Try loading the freshest existing dataset before going to the network.
|
|
if epochs, err := m.store.List(); err == nil {
|
|
for _, e := range epochs {
|
|
if time.Since(e) > freshnessTTL {
|
|
continue
|
|
}
|
|
if active := m.Active(); active != nil && active.Epoch().Equal(e) {
|
|
return "", nil
|
|
}
|
|
if err := m.LoadEpoch(ctx, e); err == nil {
|
|
return "", nil
|
|
}
|
|
}
|
|
}
|
|
|
|
latest, err := m.src.LatestEpoch(ctx)
|
|
if err != nil {
|
|
return "", fmt.Errorf("latest epoch: %w", err)
|
|
}
|
|
if active := m.Active(); active != nil && !latest.After(active.Epoch()) {
|
|
return "", nil
|
|
}
|
|
|
|
jobID := m.Download(latest)
|
|
|
|
// Spawn a watcher that loads the dataset on successful completion.
|
|
go func() {
|
|
for {
|
|
info, ok := m.GetJob(jobID)
|
|
if !ok {
|
|
return
|
|
}
|
|
switch info.Status {
|
|
case JobComplete:
|
|
if err := m.LoadEpoch(context.Background(), latest); err != nil {
|
|
m.log.Error("load after download", zap.Error(err))
|
|
}
|
|
return
|
|
case JobFailed, JobCancelled:
|
|
return
|
|
}
|
|
time.Sleep(2 * time.Second)
|
|
}
|
|
}()
|
|
return jobID, nil
|
|
}
|
|
|
|
// runDownload executes one Source.Download invocation and records its outcome.
|
|
func (m *Manager) runDownload(ctx context.Context, e *jobEntry) {
|
|
defer m.inFlight.Delete(e.epoch.Format(time.RFC3339))
|
|
|
|
e.mu.Lock()
|
|
e.status = JobRunning
|
|
e.mu.Unlock()
|
|
|
|
m.log.Info("download started",
|
|
zap.String("job", e.id),
|
|
zap.Time("epoch", e.epoch))
|
|
|
|
err := m.src.Download(ctx, e.epoch, m.store, jobProgress{e: e}, m.throttle)
|
|
now := time.Now().UTC()
|
|
|
|
e.mu.Lock()
|
|
e.endedAt = now
|
|
switch {
|
|
case errors.Is(err, context.Canceled):
|
|
e.status = JobCancelled
|
|
case err != nil:
|
|
e.status = JobFailed
|
|
e.errStr = err.Error()
|
|
default:
|
|
e.status = JobComplete
|
|
}
|
|
finalStatus := e.status
|
|
e.mu.Unlock()
|
|
|
|
m.log.Info("download finished",
|
|
zap.String("job", e.id),
|
|
zap.String("status", string(finalStatus)),
|
|
zap.NamedError("err", err))
|
|
}
|
|
|
|
// completeShortCircuit records a job as complete without performing any work.
|
|
func (m *Manager) completeShortCircuit(ctx context.Context, e *jobEntry) {
|
|
_ = ctx
|
|
defer m.inFlight.Delete(e.epoch.Format(time.RFC3339))
|
|
now := time.Now().UTC()
|
|
e.mu.Lock()
|
|
e.status = JobComplete
|
|
e.endedAt = now
|
|
e.mu.Unlock()
|
|
}
|
|
|
|
// swapActive replaces the active field and closes the previous one if it
|
|
// implements io.Closer.
|
|
func (m *Manager) swapActive(f weather.WindField) {
|
|
m.activeMu.Lock()
|
|
old := m.active
|
|
m.active = f
|
|
m.activeMu.Unlock()
|
|
if c, ok := old.(interface{ Close() error }); ok && c != nil {
|
|
if err := c.Close(); err != nil {
|
|
m.log.Warn("close old dataset", zap.Error(err))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Close releases all resources, cancelling any in-flight jobs.
|
|
func (m *Manager) Close() error {
|
|
m.jobsMu.Lock()
|
|
for _, e := range m.jobs {
|
|
e.cancel()
|
|
}
|
|
m.jobsMu.Unlock()
|
|
|
|
m.activeMu.Lock()
|
|
active := m.active
|
|
m.active = nil
|
|
m.activeMu.Unlock()
|
|
if c, ok := active.(interface{ Close() error }); ok && c != nil {
|
|
return c.Close()
|
|
}
|
|
return nil
|
|
}
|