feat: polish & windviz & deploy
This commit is contained in:
parent
81b8e763bd
commit
465ad00f7b
78 changed files with 20622 additions and 2154 deletions
|
|
@ -1,11 +1,15 @@
|
|||
// Package async implements the asynchronous prediction endpoints
|
||||
// (/api/v1/predictions{,/{id}}) and the worker pool that executes them.
|
||||
// Package async runs profile-driven predictions on a bounded worker pool and
|
||||
// retains their results in memory for a configurable TTL. It is the engine
|
||||
// behind the asynchronous prediction endpoints; the HTTP surface itself is
|
||||
// the ogen-generated server in the parent package.
|
||||
//
|
||||
// Each enqueued request is assigned a job ID; the result is held in
|
||||
// memory for a configurable TTL after completion.
|
||||
// The package is decoupled from the request/response wire types: a RunFunc is
|
||||
// injected at construction, so this file imports only the generated API types
|
||||
// it stores and returns.
|
||||
package async
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
|
@ -13,12 +17,13 @@ import (
|
|||
"github.com/google/uuid"
|
||||
"go.uber.org/zap"
|
||||
|
||||
"predictor-refactored/internal/api/v2"
|
||||
"predictor-refactored/internal/datasets"
|
||||
"predictor-refactored/internal/elevation"
|
||||
"predictor-refactored/internal/metrics"
|
||||
apirest "predictor-refactored/pkg/rest"
|
||||
)
|
||||
|
||||
// RunFunc executes one prediction synchronously.
|
||||
type RunFunc func(req *apirest.PredictionV2Request) (*apirest.PredictionV2Response, error)
|
||||
|
||||
// Status is the lifecycle state of a prediction job.
|
||||
type Status string
|
||||
|
||||
|
|
@ -30,20 +35,20 @@ const (
|
|||
StatusCancelled Status = "cancelled"
|
||||
)
|
||||
|
||||
// JobInfo is the externally-visible snapshot of one prediction job.
|
||||
// JobInfo is a snapshot of one prediction job.
|
||||
type JobInfo struct {
|
||||
ID string `json:"id"`
|
||||
Status Status `json:"status"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||
CompletedAt *time.Time `json:"completed_at,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Result *v2.PredictionResponse `json:"result,omitempty"`
|
||||
ID string
|
||||
Status Status
|
||||
CreatedAt time.Time
|
||||
StartedAt *time.Time
|
||||
CompletedAt *time.Time
|
||||
Error string
|
||||
Result *apirest.PredictionV2Response
|
||||
}
|
||||
|
||||
type job struct {
|
||||
id string
|
||||
req v2.PredictionRequest
|
||||
req *apirest.PredictionV2Request
|
||||
createdAt time.Time
|
||||
|
||||
mu sync.Mutex
|
||||
|
|
@ -51,19 +56,15 @@ type job struct {
|
|||
startedAt time.Time
|
||||
completedAt time.Time
|
||||
errStr string
|
||||
result *v2.PredictionResponse
|
||||
cancel chan struct{}
|
||||
result *apirest.PredictionV2Response
|
||||
}
|
||||
|
||||
func (j *job) snapshot() JobInfo {
|
||||
j.mu.Lock()
|
||||
defer j.mu.Unlock()
|
||||
info := JobInfo{
|
||||
ID: j.id,
|
||||
Status: j.status,
|
||||
CreatedAt: j.createdAt,
|
||||
Error: j.errStr,
|
||||
Result: j.result,
|
||||
ID: j.id, Status: j.status, CreatedAt: j.createdAt,
|
||||
Error: j.errStr, Result: j.result,
|
||||
}
|
||||
if !j.startedAt.IsZero() {
|
||||
t := j.startedAt
|
||||
|
|
@ -76,16 +77,14 @@ func (j *job) snapshot() JobInfo {
|
|||
return info
|
||||
}
|
||||
|
||||
// Manager runs a fixed pool of workers to execute prediction jobs and
|
||||
// retains their results for the configured TTL.
|
||||
// Manager runs a fixed pool of workers and retains job results for a TTL.
|
||||
type Manager struct {
|
||||
mgr *datasets.Manager
|
||||
elev *elevation.Dataset
|
||||
run RunFunc
|
||||
metrics metrics.Sink
|
||||
log *zap.Logger
|
||||
|
||||
queue chan *job
|
||||
ttl time.Duration
|
||||
queue chan *job
|
||||
ttl time.Duration
|
||||
|
||||
jobsMu sync.RWMutex
|
||||
jobs map[string]*job
|
||||
|
|
@ -97,16 +96,14 @@ type Manager struct {
|
|||
|
||||
// Config controls Manager construction.
|
||||
type Config struct {
|
||||
// Workers is the maximum concurrent prediction executions.
|
||||
Workers int
|
||||
// QueueSize bounds the number of jobs waiting to start.
|
||||
QueueSize int
|
||||
// ResultTTL is how long completed/failed jobs are retained in memory.
|
||||
ResultTTL time.Duration
|
||||
Workers int // max concurrent executions
|
||||
QueueSize int // pending-queue bound
|
||||
ResultTTL time.Duration // retention of terminal jobs
|
||||
}
|
||||
|
||||
// New constructs a Manager with the given config and starts the workers.
|
||||
func New(cfg Config, mgr *datasets.Manager, elev *elevation.Dataset, sink metrics.Sink, log *zap.Logger) *Manager {
|
||||
// New constructs a Manager and starts its workers. run executes one
|
||||
// prediction; sink and log may be nil.
|
||||
func New(cfg Config, run RunFunc, sink metrics.Sink, log *zap.Logger) *Manager {
|
||||
if cfg.Workers <= 0 {
|
||||
cfg.Workers = 4
|
||||
}
|
||||
|
|
@ -123,7 +120,7 @@ func New(cfg Config, mgr *datasets.Manager, elev *elevation.Dataset, sink metric
|
|||
log = zap.NewNop()
|
||||
}
|
||||
m := &Manager{
|
||||
mgr: mgr, elev: elev, metrics: sink, log: log,
|
||||
run: run, metrics: sink, log: log,
|
||||
queue: make(chan *job, cfg.QueueSize),
|
||||
jobs: make(map[string]*job),
|
||||
ttl: cfg.ResultTTL,
|
||||
|
|
@ -138,15 +135,14 @@ func New(cfg Config, mgr *datasets.Manager, elev *elevation.Dataset, sink metric
|
|||
return m
|
||||
}
|
||||
|
||||
// Enqueue creates a new job from req and returns its snapshot.
|
||||
// Returns false when the queue is full.
|
||||
func (m *Manager) Enqueue(req v2.PredictionRequest) (JobInfo, bool) {
|
||||
// Enqueue creates a job from req and returns its snapshot. The bool is false
|
||||
// when the queue is full (the returned job is marked failed).
|
||||
func (m *Manager) Enqueue(req *apirest.PredictionV2Request) (JobInfo, bool) {
|
||||
j := &job{
|
||||
id: uuid.New().String(),
|
||||
req: req,
|
||||
createdAt: time.Now().UTC(),
|
||||
status: StatusPending,
|
||||
cancel: make(chan struct{}),
|
||||
}
|
||||
m.jobsMu.Lock()
|
||||
m.jobs[j.id] = j
|
||||
|
|
@ -156,7 +152,6 @@ func (m *Manager) Enqueue(req v2.PredictionRequest) (JobInfo, bool) {
|
|||
case m.queue <- j:
|
||||
return j.snapshot(), true
|
||||
default:
|
||||
// Queue full — mark the job failed and return it.
|
||||
j.mu.Lock()
|
||||
j.status = StatusFailed
|
||||
j.errStr = "prediction queue full"
|
||||
|
|
@ -177,8 +172,11 @@ func (m *Manager) Get(id string) (JobInfo, bool) {
|
|||
return j.snapshot(), true
|
||||
}
|
||||
|
||||
// Cancel marks a not-yet-started job as cancelled. Returns false when the
|
||||
// job is unknown or already terminal.
|
||||
// Cancel marks a still-queued job cancelled. Returns false when the job is
|
||||
// unknown or already running/terminal — a running prediction cannot be
|
||||
// interrupted (the worker would otherwise overwrite the cancelled status with
|
||||
// its result), so callers get an honest "too late" rather than a 204 that the
|
||||
// worker silently undoes.
|
||||
func (m *Manager) Cancel(id string) bool {
|
||||
m.jobsMu.RLock()
|
||||
j, ok := m.jobs[id]
|
||||
|
|
@ -187,22 +185,19 @@ func (m *Manager) Cancel(id string) bool {
|
|||
return false
|
||||
}
|
||||
j.mu.Lock()
|
||||
terminal := j.status == StatusComplete || j.status == StatusFailed || j.status == StatusCancelled
|
||||
if terminal {
|
||||
j.mu.Unlock()
|
||||
defer j.mu.Unlock()
|
||||
if j.status != StatusPending {
|
||||
return false
|
||||
}
|
||||
j.status = StatusCancelled
|
||||
j.completedAt = time.Now().UTC()
|
||||
j.mu.Unlock()
|
||||
close(j.cancel)
|
||||
return true
|
||||
}
|
||||
|
||||
// Inflight returns the count of running jobs.
|
||||
// Inflight returns the number of running jobs.
|
||||
func (m *Manager) Inflight() int64 { return m.inflight.Load() }
|
||||
|
||||
// Close shuts down workers and the evictor.
|
||||
// Close stops the workers and the evictor.
|
||||
func (m *Manager) Close() {
|
||||
close(m.closed)
|
||||
close(m.queue)
|
||||
|
|
@ -212,41 +207,49 @@ func (m *Manager) Close() {
|
|||
func (m *Manager) worker() {
|
||||
defer m.wg.Done()
|
||||
for j := range m.queue {
|
||||
// Check cancellation before starting.
|
||||
j.mu.Lock()
|
||||
cancelled := j.status == StatusCancelled
|
||||
if !cancelled {
|
||||
j.status = StatusRunning
|
||||
j.startedAt = time.Now().UTC()
|
||||
}
|
||||
j.mu.Unlock()
|
||||
if cancelled {
|
||||
continue
|
||||
}
|
||||
m.inflight.Add(1)
|
||||
j.mu.Lock()
|
||||
j.status = StatusRunning
|
||||
j.startedAt = time.Now().UTC()
|
||||
j.mu.Unlock()
|
||||
|
||||
resp, err := v2.Run(m.mgr, m.elev, j.req)
|
||||
|
||||
j.mu.Lock()
|
||||
j.completedAt = time.Now().UTC()
|
||||
if err != nil {
|
||||
j.status = StatusFailed
|
||||
j.errStr = err.Error()
|
||||
} else {
|
||||
j.status = StatusComplete
|
||||
j.result = resp
|
||||
}
|
||||
j.mu.Unlock()
|
||||
m.inflight.Add(-1)
|
||||
|
||||
if err == nil {
|
||||
m.metrics.Prediction("async", j.completedAt.Sub(j.startedAt), nil)
|
||||
} else {
|
||||
m.metrics.Prediction("async", j.completedAt.Sub(j.startedAt), err)
|
||||
}
|
||||
m.execute(j)
|
||||
}
|
||||
}
|
||||
|
||||
// execute runs one job, recovering from a panic in the injected RunFunc so a
|
||||
// single bad prediction can't leak the inflight counter or kill the worker.
|
||||
func (m *Manager) execute(j *job) {
|
||||
m.inflight.Add(1)
|
||||
defer m.inflight.Add(-1)
|
||||
|
||||
resp, err := func() (resp *apirest.PredictionV2Response, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = fmt.Errorf("prediction panicked: %v", r)
|
||||
}
|
||||
}()
|
||||
return m.run(j.req)
|
||||
}()
|
||||
|
||||
j.mu.Lock()
|
||||
j.completedAt = time.Now().UTC()
|
||||
if err != nil {
|
||||
j.status = StatusFailed
|
||||
j.errStr = err.Error()
|
||||
} else {
|
||||
j.status = StatusComplete
|
||||
j.result = resp
|
||||
}
|
||||
dur := j.completedAt.Sub(j.startedAt)
|
||||
j.mu.Unlock()
|
||||
m.metrics.Prediction("async", dur, err)
|
||||
}
|
||||
|
||||
func (m *Manager) evictor() {
|
||||
defer m.wg.Done()
|
||||
ticker := time.NewTicker(m.ttl / 4)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue