engine refactor
This commit is contained in:
parent
9e663db9dc
commit
81b8e763bd
37 changed files with 3532 additions and 1639 deletions
|
|
@ -27,23 +27,22 @@ const (
|
|||
|
||||
// JobInfo is the externally-visible snapshot of a download job.
|
||||
type JobInfo struct {
|
||||
ID string
|
||||
Source string
|
||||
Epoch time.Time
|
||||
Status JobStatus
|
||||
StartedAt time.Time
|
||||
EndedAt *time.Time
|
||||
Err string
|
||||
Total int
|
||||
Done int
|
||||
Bytes int64
|
||||
ID string
|
||||
Source string
|
||||
Dataset DatasetID
|
||||
Status JobStatus
|
||||
StartedAt time.Time
|
||||
EndedAt *time.Time
|
||||
Err string
|
||||
Total int
|
||||
Done int
|
||||
Bytes int64
|
||||
}
|
||||
|
||||
// jobEntry is the Manager's mutable record for one job.
|
||||
type jobEntry struct {
|
||||
id string
|
||||
source string
|
||||
epoch time.Time
|
||||
dataset DatasetID
|
||||
startedAt time.Time
|
||||
cancel context.CancelFunc
|
||||
|
||||
|
|
@ -60,7 +59,7 @@ type jobEntry struct {
|
|||
func (e *jobEntry) snapshot() JobInfo {
|
||||
e.mu.Lock()
|
||||
info := JobInfo{
|
||||
ID: e.id, Source: e.source, Epoch: e.epoch,
|
||||
ID: e.id, Source: e.source, Dataset: e.dataset,
|
||||
StartedAt: e.startedAt, Status: e.status, Err: e.errStr,
|
||||
}
|
||||
if !e.endedAt.IsZero() {
|
||||
|
|
@ -74,14 +73,20 @@ func (e *jobEntry) snapshot() JobInfo {
|
|||
return info
|
||||
}
|
||||
|
||||
// jobProgress is the ProgressSink wired into a jobEntry.
|
||||
type jobProgress struct{ e *jobEntry }
|
||||
|
||||
func (p jobProgress) SetTotal(n int) { p.e.total.Store(int64(n)) }
|
||||
func (p jobProgress) StepComplete() { p.e.done.Add(1) }
|
||||
func (p jobProgress) Bytes(n int64) { p.e.bytes.Add(n) }
|
||||
|
||||
// Manager coordinates dataset downloads and exposes the active WindField.
|
||||
// loadedDataset bundles a loaded WindField with its identity and coverage.
|
||||
type loadedDataset struct {
|
||||
ID DatasetID
|
||||
Field weather.WindField
|
||||
Coverage Coverage
|
||||
}
|
||||
|
||||
// Manager coordinates dataset downloads and exposes the active WindFields.
|
||||
type Manager struct {
|
||||
src Source
|
||||
store Storage
|
||||
|
|
@ -89,18 +94,15 @@ type Manager struct {
|
|||
log *zap.Logger
|
||||
|
||||
activeMu sync.RWMutex
|
||||
active weather.WindField
|
||||
active []loadedDataset
|
||||
|
||||
jobsMu sync.RWMutex
|
||||
jobs map[string]*jobEntry
|
||||
|
||||
// inFlight maps an epoch's RFC3339 representation to its jobID, enforcing
|
||||
// single-flight per epoch.
|
||||
inFlight sync.Map
|
||||
inFlight sync.Map // key: dataset filename, value: jobID
|
||||
}
|
||||
|
||||
// New returns a Manager wiring source, store, and an optional throttle.
|
||||
// A nil log uses zap.NewNop().
|
||||
// New wires a Manager.
|
||||
func New(src Source, store Storage, throttle Throttle, log *zap.Logger) *Manager {
|
||||
if log == nil {
|
||||
log = zap.NewNop()
|
||||
|
|
@ -119,18 +121,65 @@ func New(src Source, store Storage, throttle Throttle, log *zap.Logger) *Manager
|
|||
// Source returns the underlying source ID.
|
||||
func (m *Manager) Source() string { return m.src.ID() }
|
||||
|
||||
// Active returns the currently-loaded WindField, or nil.
|
||||
// Active returns the currently-loaded global WindField (the dataset with
|
||||
// IsGlobal subset, most recently loaded). Returns nil if no global
|
||||
// dataset is loaded; in cluster setups with only regional subsets, callers
|
||||
// should use SelectFor.
|
||||
func (m *Manager) Active() weather.WindField {
|
||||
m.activeMu.RLock()
|
||||
defer m.activeMu.RUnlock()
|
||||
return m.active
|
||||
for _, d := range m.active {
|
||||
if d.ID.Subset.IsGlobal() {
|
||||
return d.Field
|
||||
}
|
||||
}
|
||||
if len(m.active) > 0 {
|
||||
return m.active[0].Field
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Ready reports whether a dataset is currently loaded.
|
||||
// Ready reports whether at least one dataset is loaded.
|
||||
func (m *Manager) Ready() bool { return m.Active() != nil }
|
||||
|
||||
// ListEpochs returns all stored dataset epochs, newest first.
|
||||
func (m *Manager) ListEpochs() ([]time.Time, error) { return m.store.List() }
|
||||
// SelectFor returns a loaded WindField whose coverage contains (t, lat, lng).
|
||||
// Returns nil when no loaded dataset covers the query.
|
||||
func (m *Manager) SelectFor(t time.Time, lat, lng float64) weather.WindField {
|
||||
m.activeMu.RLock()
|
||||
defer m.activeMu.RUnlock()
|
||||
for _, d := range m.active {
|
||||
if d.Coverage.Covers(t, lat, lng) {
|
||||
return d.Field
|
||||
}
|
||||
}
|
||||
// Fallback: any global dataset is permissive about region.
|
||||
for _, d := range m.active {
|
||||
if d.ID.Subset.IsGlobal() {
|
||||
return d.Field
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadedDatasets returns snapshots of every currently-loaded dataset.
|
||||
func (m *Manager) LoadedDatasets() []LoadedDatasetInfo {
|
||||
m.activeMu.RLock()
|
||||
defer m.activeMu.RUnlock()
|
||||
out := make([]LoadedDatasetInfo, 0, len(m.active))
|
||||
for _, d := range m.active {
|
||||
out = append(out, LoadedDatasetInfo{ID: d.ID, Coverage: d.Coverage})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// LoadedDatasetInfo is a serializable snapshot of one active dataset.
|
||||
type LoadedDatasetInfo struct {
|
||||
ID DatasetID
|
||||
Coverage Coverage
|
||||
}
|
||||
|
||||
// ListEpochs returns all stored datasets, newest first.
|
||||
func (m *Manager) ListEpochs() ([]DatasetID, error) { return m.store.List() }
|
||||
|
||||
// ListJobs returns snapshots of every job recorded since startup.
|
||||
func (m *Manager) ListJobs() []JobInfo {
|
||||
|
|
@ -143,7 +192,7 @@ func (m *Manager) ListJobs() []JobInfo {
|
|||
return out
|
||||
}
|
||||
|
||||
// GetJob returns the snapshot for a job, or false if id is unknown.
|
||||
// GetJob returns the snapshot for a job.
|
||||
func (m *Manager) GetJob(id string) (JobInfo, bool) {
|
||||
m.jobsMu.RLock()
|
||||
e, ok := m.jobs[id]
|
||||
|
|
@ -154,8 +203,7 @@ func (m *Manager) GetJob(id string) (JobInfo, bool) {
|
|||
return e.snapshot(), true
|
||||
}
|
||||
|
||||
// CancelJob cancels a running job. Returns false if id is unknown or the
|
||||
// job is already terminal.
|
||||
// CancelJob cancels a running job.
|
||||
func (m *Manager) CancelJob(id string) bool {
|
||||
m.jobsMu.RLock()
|
||||
e, ok := m.jobs[id]
|
||||
|
|
@ -173,28 +221,31 @@ func (m *Manager) CancelJob(id string) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// RemoveEpoch deletes a stored dataset. If epoch is currently active, the
|
||||
// active field is cleared.
|
||||
func (m *Manager) RemoveEpoch(epoch time.Time) error {
|
||||
epoch = epoch.UTC()
|
||||
if active := m.Active(); active != nil && active.Epoch().Equal(epoch) {
|
||||
m.activeMu.Lock()
|
||||
m.active = nil
|
||||
m.activeMu.Unlock()
|
||||
// Remove deletes a stored dataset. If the dataset is currently loaded,
|
||||
// it is unloaded first.
|
||||
func (m *Manager) Remove(id DatasetID) error {
|
||||
m.activeMu.Lock()
|
||||
out := m.active[:0]
|
||||
var removed *loadedDataset
|
||||
for i := range m.active {
|
||||
d := m.active[i]
|
||||
if d.ID.Equals(id) {
|
||||
removed = &d
|
||||
continue
|
||||
}
|
||||
out = append(out, d)
|
||||
}
|
||||
return m.store.Remove(epoch)
|
||||
m.active = out
|
||||
m.activeMu.Unlock()
|
||||
if removed != nil {
|
||||
closeField(removed.Field, m.log)
|
||||
}
|
||||
return m.store.Remove(id)
|
||||
}
|
||||
|
||||
// Download starts (or resumes) a download job for epoch in the background.
|
||||
// Returns the JobID. If a job for the same epoch is already running, its
|
||||
// existing JobID is returned.
|
||||
//
|
||||
// If the dataset is already present on disk, a synthetic completed JobInfo
|
||||
// is recorded and its JobID returned.
|
||||
func (m *Manager) Download(epoch time.Time) string {
|
||||
epoch = epoch.UTC()
|
||||
key := epoch.Format(time.RFC3339)
|
||||
|
||||
// Download starts (or resumes) a download job for id in the background.
|
||||
func (m *Manager) Download(id DatasetID) string {
|
||||
key := id.Filename()
|
||||
if existing, ok := m.inFlight.Load(key); ok {
|
||||
return existing.(string)
|
||||
}
|
||||
|
|
@ -209,7 +260,7 @@ func (m *Manager) Download(epoch time.Time) string {
|
|||
e := &jobEntry{
|
||||
id: jobID,
|
||||
source: m.src.ID(),
|
||||
epoch: epoch,
|
||||
dataset: id,
|
||||
startedAt: now,
|
||||
status: JobPending,
|
||||
cancel: cancel,
|
||||
|
|
@ -218,8 +269,7 @@ func (m *Manager) Download(epoch time.Time) string {
|
|||
m.jobs[jobID] = e
|
||||
m.jobsMu.Unlock()
|
||||
|
||||
if m.store.Exists(epoch) {
|
||||
// Skip the download but still record the job for traceability.
|
||||
if m.store.Exists(id) {
|
||||
go m.completeShortCircuit(ctx, e)
|
||||
return jobID
|
||||
}
|
||||
|
|
@ -227,46 +277,54 @@ func (m *Manager) Download(epoch time.Time) string {
|
|||
return jobID
|
||||
}
|
||||
|
||||
// LoadEpoch swaps the active WindField to epoch's stored dataset.
|
||||
func (m *Manager) LoadEpoch(ctx context.Context, epoch time.Time) error {
|
||||
epoch = epoch.UTC()
|
||||
if !m.store.Exists(epoch) {
|
||||
return fmt.Errorf("epoch %s not present on disk", epoch.Format(time.RFC3339))
|
||||
// Load swaps in id's stored dataset, making it available to predictions.
|
||||
func (m *Manager) Load(ctx context.Context, id DatasetID) error {
|
||||
if !m.store.Exists(id) {
|
||||
return fmt.Errorf("dataset %s not present on disk", id.Filename())
|
||||
}
|
||||
field, err := m.src.Open(ctx, epoch, m.store)
|
||||
field, err := m.src.Open(ctx, id, m.store)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open epoch: %w", err)
|
||||
return fmt.Errorf("open dataset: %w", err)
|
||||
}
|
||||
m.swapActive(field)
|
||||
cov := m.src.Coverage(id)
|
||||
m.activeMu.Lock()
|
||||
// Replace any previously-loaded dataset with the same ID.
|
||||
for i := range m.active {
|
||||
if m.active[i].ID.Equals(id) {
|
||||
closeField(m.active[i].Field, m.log)
|
||||
m.active[i] = loadedDataset{ID: id, Field: field, Coverage: cov}
|
||||
m.activeMu.Unlock()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
m.active = append(m.active, loadedDataset{ID: id, Field: field, Coverage: cov})
|
||||
m.activeMu.Unlock()
|
||||
m.log.Info("loaded dataset",
|
||||
zap.Time("epoch", epoch),
|
||||
zap.String("filename", id.Filename()),
|
||||
zap.String("source", m.src.ID()))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Refresh ensures the most recent upstream dataset is downloaded and active.
|
||||
//
|
||||
// If the freshest stored dataset is newer than retentionTTL old, no upstream
|
||||
// check is performed. Otherwise the source's LatestEpoch is consulted; if it
|
||||
// is newer than the active dataset, a download is started and on completion
|
||||
// the new dataset becomes active.
|
||||
// Refresh ensures the freshest global dataset is downloaded and active.
|
||||
//
|
||||
// Returns the JobID started, or empty string when nothing was scheduled.
|
||||
func (m *Manager) Refresh(ctx context.Context, freshnessTTL time.Duration) (string, error) {
|
||||
if active := m.Active(); active != nil && time.Since(active.Epoch()) < freshnessTTL {
|
||||
if a := m.activeGlobal(); a != nil && time.Since(a.ID.Epoch) < freshnessTTL {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// Try loading the freshest existing dataset before going to the network.
|
||||
if epochs, err := m.store.List(); err == nil {
|
||||
for _, e := range epochs {
|
||||
if time.Since(e) > freshnessTTL {
|
||||
if datasets, err := m.store.List(); err == nil {
|
||||
for _, id := range datasets {
|
||||
if !id.Subset.IsGlobal() {
|
||||
continue
|
||||
}
|
||||
if active := m.Active(); active != nil && active.Epoch().Equal(e) {
|
||||
if time.Since(id.Epoch) > freshnessTTL {
|
||||
continue
|
||||
}
|
||||
if a := m.activeGlobal(); a != nil && a.ID.Equals(id) {
|
||||
return "", nil
|
||||
}
|
||||
if err := m.LoadEpoch(ctx, e); err == nil {
|
||||
if err := m.Load(ctx, id); err == nil {
|
||||
return "", nil
|
||||
}
|
||||
}
|
||||
|
|
@ -276,37 +334,50 @@ func (m *Manager) Refresh(ctx context.Context, freshnessTTL time.Duration) (stri
|
|||
if err != nil {
|
||||
return "", fmt.Errorf("latest epoch: %w", err)
|
||||
}
|
||||
if active := m.Active(); active != nil && !latest.After(active.Epoch()) {
|
||||
id := DatasetID{Epoch: latest}
|
||||
if a := m.activeGlobal(); a != nil && !latest.After(a.ID.Epoch) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
jobID := m.Download(latest)
|
||||
|
||||
// Spawn a watcher that loads the dataset on successful completion.
|
||||
go func() {
|
||||
for {
|
||||
info, ok := m.GetJob(jobID)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
switch info.Status {
|
||||
case JobComplete:
|
||||
if err := m.LoadEpoch(context.Background(), latest); err != nil {
|
||||
m.log.Error("load after download", zap.Error(err))
|
||||
}
|
||||
return
|
||||
case JobFailed, JobCancelled:
|
||||
return
|
||||
}
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
}()
|
||||
jobID := m.Download(id)
|
||||
go m.loadAfterCompletion(jobID, id)
|
||||
return jobID, nil
|
||||
}
|
||||
|
||||
// runDownload executes one Source.Download invocation and records its outcome.
|
||||
// activeGlobal returns the currently-loaded global dataset, if any.
|
||||
func (m *Manager) activeGlobal() *loadedDataset {
|
||||
m.activeMu.RLock()
|
||||
defer m.activeMu.RUnlock()
|
||||
for i := range m.active {
|
||||
if m.active[i].ID.Subset.IsGlobal() {
|
||||
d := m.active[i]
|
||||
return &d
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) loadAfterCompletion(jobID string, id DatasetID) {
|
||||
for {
|
||||
info, ok := m.GetJob(jobID)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
switch info.Status {
|
||||
case JobComplete:
|
||||
if err := m.Load(context.Background(), id); err != nil {
|
||||
m.log.Error("load after download", zap.Error(err))
|
||||
}
|
||||
return
|
||||
case JobFailed, JobCancelled:
|
||||
return
|
||||
}
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Manager) runDownload(ctx context.Context, e *jobEntry) {
|
||||
defer m.inFlight.Delete(e.epoch.Format(time.RFC3339))
|
||||
defer m.inFlight.Delete(e.dataset.Filename())
|
||||
|
||||
e.mu.Lock()
|
||||
e.status = JobRunning
|
||||
|
|
@ -314,9 +385,9 @@ func (m *Manager) runDownload(ctx context.Context, e *jobEntry) {
|
|||
|
||||
m.log.Info("download started",
|
||||
zap.String("job", e.id),
|
||||
zap.Time("epoch", e.epoch))
|
||||
zap.String("dataset", e.dataset.Filename()))
|
||||
|
||||
err := m.src.Download(ctx, e.epoch, m.store, jobProgress{e: e}, m.throttle)
|
||||
err := m.src.Download(ctx, e.dataset, m.store, jobProgress{e: e}, m.throttle)
|
||||
now := time.Now().UTC()
|
||||
|
||||
e.mu.Lock()
|
||||
|
|
@ -339,10 +410,9 @@ func (m *Manager) runDownload(ctx context.Context, e *jobEntry) {
|
|||
zap.NamedError("err", err))
|
||||
}
|
||||
|
||||
// completeShortCircuit records a job as complete without performing any work.
|
||||
func (m *Manager) completeShortCircuit(ctx context.Context, e *jobEntry) {
|
||||
_ = ctx
|
||||
defer m.inFlight.Delete(e.epoch.Format(time.RFC3339))
|
||||
defer m.inFlight.Delete(e.dataset.Filename())
|
||||
now := time.Now().UTC()
|
||||
e.mu.Lock()
|
||||
e.status = JobComplete
|
||||
|
|
@ -350,20 +420,6 @@ func (m *Manager) completeShortCircuit(ctx context.Context, e *jobEntry) {
|
|||
e.mu.Unlock()
|
||||
}
|
||||
|
||||
// swapActive replaces the active field and closes the previous one if it
|
||||
// implements io.Closer.
|
||||
func (m *Manager) swapActive(f weather.WindField) {
|
||||
m.activeMu.Lock()
|
||||
old := m.active
|
||||
m.active = f
|
||||
m.activeMu.Unlock()
|
||||
if c, ok := old.(interface{ Close() error }); ok && c != nil {
|
||||
if err := c.Close(); err != nil {
|
||||
m.log.Warn("close old dataset", zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close releases all resources, cancelling any in-flight jobs.
|
||||
func (m *Manager) Close() error {
|
||||
m.jobsMu.Lock()
|
||||
|
|
@ -373,11 +429,18 @@ func (m *Manager) Close() error {
|
|||
m.jobsMu.Unlock()
|
||||
|
||||
m.activeMu.Lock()
|
||||
active := m.active
|
||||
for _, d := range m.active {
|
||||
closeField(d.Field, m.log)
|
||||
}
|
||||
m.active = nil
|
||||
m.activeMu.Unlock()
|
||||
if c, ok := active.(interface{ Close() error }); ok && c != nil {
|
||||
return c.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func closeField(f weather.WindField, log *zap.Logger) {
|
||||
if c, ok := f.(interface{ Close() error }); ok && c != nil {
|
||||
if err := c.Close(); err != nil && log != nil {
|
||||
log.Warn("close dataset", zap.Error(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue