forked from gsn/predictor
feat: downloader
This commit is contained in:
parent
b9c1a98895
commit
42e7924be9
37 changed files with 2422 additions and 94 deletions
|
|
@ -3,10 +3,12 @@ package grib
|
|||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
|
|
@ -21,6 +23,12 @@ type RedisIface interface {
|
|||
Get(key string) ([]byte, error)
|
||||
}
|
||||
|
||||
type Service interface {
|
||||
Update(ctx context.Context) error
|
||||
Extract(ctx context.Context, lat, lon, alt float64, ts time.Time) ([2]float64, error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
type ServiceConfig struct {
|
||||
Dir string
|
||||
TTL time.Duration
|
||||
|
|
@ -36,7 +44,7 @@ type service struct {
|
|||
data atomic.Pointer[dataset]
|
||||
}
|
||||
|
||||
func New(cfg ServiceConfig) (*service, error) {
|
||||
func New(cfg ServiceConfig) (Service, error) {
|
||||
if cfg.TTL == 0 {
|
||||
cfg.TTL = 24 * time.Hour
|
||||
}
|
||||
|
|
@ -44,29 +52,134 @@ func New(cfg ServiceConfig) (*service, error) {
|
|||
return nil, err
|
||||
}
|
||||
s := &service{cfg: cfg, cache: memCache{ttl: cfg.CacheTTL}}
|
||||
|
||||
// Try to load existing dataset on startup
|
||||
if err := s.loadExistingDataset(); err != nil {
|
||||
// Log error but don't fail startup - dataset will be loaded on first Update()
|
||||
// This allows the service to start even if no data is available yet
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// loadExistingDataset tries to load the most recent available dataset
|
||||
func (s *service) loadExistingDataset() error {
|
||||
// Find the most recent cube file
|
||||
pattern := filepath.Join(s.cfg.Dir, "*.cube")
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(matches) == 0 {
|
||||
return errcodes.ErrNoCubeFilesFound
|
||||
}
|
||||
|
||||
// Sort by modification time (newest first)
|
||||
var latestFile string
|
||||
var latestTime time.Time
|
||||
|
||||
for _, match := range matches {
|
||||
info, err := os.Stat(match)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if info.ModTime().After(latestTime) {
|
||||
latestTime = info.ModTime()
|
||||
latestFile = match
|
||||
}
|
||||
}
|
||||
|
||||
if latestFile == "" {
|
||||
return errcodes.ErrNoValidCubeFilesFound
|
||||
}
|
||||
|
||||
// Check if the file is fresh enough
|
||||
if time.Since(latestTime) > s.cfg.TTL {
|
||||
return errcodes.Wrap(errcodes.ErrLatestCubeFileIsTooOld, "latest cube file is too old")
|
||||
}
|
||||
|
||||
// Load the dataset
|
||||
c, err := openCube(latestFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Extract run time from filename
|
||||
base := filepath.Base(latestFile)
|
||||
runStr := strings.TrimSuffix(base, ".cube")
|
||||
run, err := time.Parse("20060102_15", runStr)
|
||||
if err != nil {
|
||||
c.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
ds := &dataset{cube: c, runUTC: run.Unix()}
|
||||
s.data.Store(ds)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update() downloads missing GRIBs, assembles cube into a single mmap‑file.
|
||||
func (s *service) Update(ctx context.Context) error {
|
||||
// Check if we already have fresh data
|
||||
if d := s.data.Load(); d != nil {
|
||||
runTime := time.Unix(d.runUTC, 0)
|
||||
if time.Since(runTime) < s.cfg.TTL {
|
||||
// Data is still fresh, no need to update
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
unlock, err := s.cfg.Redis.Lock(ctx, "grib-dl", 45*time.Minute)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer unlock(ctx)
|
||||
|
||||
dl := downloader.Downloader{Dir: s.cfg.Dir, Parallel: s.cfg.Parallel, Client: s.cfg.Client}
|
||||
// Check again after acquiring lock (double-checked locking pattern)
|
||||
if d := s.data.Load(); d != nil {
|
||||
runTime := time.Unix(d.runUTC, 0)
|
||||
if time.Since(runTime) < s.cfg.TTL {
|
||||
// Another instance already updated the data
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
dl := Downloader{Dir: s.cfg.Dir, Parallel: s.cfg.Parallel, Client: s.cfg.Client}
|
||||
run := nearestRun(time.Now().UTC().Add(-4 * time.Hour))
|
||||
|
||||
// Check if we already have this run
|
||||
cubePath := filepath.Join(s.cfg.Dir, run.Format("20060102_15")) + ".cube"
|
||||
if _, err := os.Stat(cubePath); err == nil {
|
||||
// File exists, check if it's fresh
|
||||
info, err := os.Stat(cubePath)
|
||||
if err == nil && time.Since(info.ModTime()) < s.cfg.TTL {
|
||||
// File is fresh, just load it
|
||||
c, err := openCube(cubePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ds := &dataset{cube: c, runUTC: run.Unix()}
|
||||
s.data.Store(ds)
|
||||
s.cache = memCache{ttl: s.cfg.CacheTTL}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Download new data
|
||||
if err := dl.Run(ctx, run); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cubePath := filepath.Join(s.cfg.Dir, run.Format("20060102_15")) + ".cube"
|
||||
// Assemble cube if it doesn't exist
|
||||
if _, err := os.Stat(cubePath); err != nil {
|
||||
if err := assembleCube(s.cfg.Dir, run, cubePath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c, err := openCube(cubePath)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -101,26 +214,52 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
|
|||
|
||||
for ti, step := range steps {
|
||||
fn := filepath.Join(dir, fileName(run, step))
|
||||
gf, err := griblib.Read(fn)
|
||||
file, err := os.Open(fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, m := range gf.Messages {
|
||||
if m.ParameterShortName != "u" && m.ParameterShortName != "v" {
|
||||
|
||||
messages, err := griblib.ReadMessages(file)
|
||||
file.Close() // Close immediately after reading
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range messages {
|
||||
// Check if this is a wind component (u or v)
|
||||
// ParameterCategory 2 = momentum, ParameterNumber 2 = u-wind, 3 = v-wind
|
||||
if m.Section4.ProductDefinitionTemplateNumber != 0 {
|
||||
continue
|
||||
}
|
||||
if m.TypeOfFirstFixedSurface != 100 {
|
||||
|
||||
product := m.Section4.ProductDefinitionTemplate
|
||||
if product.ParameterCategory != 2 {
|
||||
continue
|
||||
}
|
||||
pIdx, ok := pIndex[int(m.PressureLevel)]
|
||||
|
||||
var varIdx int
|
||||
switch product.ParameterNumber {
|
||||
case 2: // u-wind
|
||||
varIdx = 0
|
||||
case 3: // v-wind
|
||||
varIdx = 1
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if this is a pressure level (type 100)
|
||||
if product.FirstSurface.Type != 100 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get pressure level in hPa
|
||||
pressure := float64(product.FirstSurface.Value) / 100.0
|
||||
pIdx, ok := pIndex[int(math.Round(pressure))]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
varIdx := 0
|
||||
if m.ParameterShortName == "v" {
|
||||
varIdx = 1
|
||||
}
|
||||
vals := m.Values
|
||||
|
||||
vals := m.Data()
|
||||
// GRIB library returns scan north->south, west->east already in row-major order
|
||||
raw := make([]byte, len(vals)*4)
|
||||
for i, v := range vals {
|
||||
|
|
@ -142,13 +281,56 @@ func (s *service) Extract(ctx context.Context, lat, lon, alt float64, ts time.Ti
|
|||
if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(48*time.Hour)) {
|
||||
return zero, errcodes.ErrOutOfBounds
|
||||
}
|
||||
|
||||
// Try memory cache first
|
||||
key := encodeKey(lat, lon, alt, ts)
|
||||
if v, ok := s.cache.get(key); ok {
|
||||
return [2]float64(v), nil
|
||||
}
|
||||
|
||||
// Try Redis cache
|
||||
redisKey := fmt.Sprintf("grib:extract:%d", key)
|
||||
if cached, err := s.cfg.Redis.Get(redisKey); err == nil {
|
||||
var result [2]float64
|
||||
if len(cached) == 16 {
|
||||
result[0] = math.Float64frombits(binary.LittleEndian.Uint64(cached[:8]))
|
||||
result[1] = math.Float64frombits(binary.LittleEndian.Uint64(cached[8:]))
|
||||
s.cache.set(key, vec(result))
|
||||
return result, nil
|
||||
} else {
|
||||
// Cache data is corrupted (wrong length)
|
||||
return zero, errcodes.ErrRedisCacheCorrupted
|
||||
}
|
||||
} else {
|
||||
// Check if it's a cache miss (expected error)
|
||||
if errcode, ok := errcodes.AsErr(err); ok && errcode == errcodes.ErrRedisCacheMiss {
|
||||
// Cache miss is expected, continue with calculation
|
||||
} else {
|
||||
// Unexpected error, return it
|
||||
return zero, err
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate result
|
||||
td := ts.Sub(time.Unix(d.runUTC, 0)).Hours()
|
||||
u, v := d.uv(lat, lon, alt, td)
|
||||
out := [2]float64{u, v}
|
||||
|
||||
// Cache in memory
|
||||
s.cache.set(key, vec(out))
|
||||
|
||||
// Cache in Redis
|
||||
encoded := make([]byte, 16)
|
||||
binary.LittleEndian.PutUint64(encoded[:8], math.Float64bits(out[0]))
|
||||
binary.LittleEndian.PutUint64(encoded[8:], math.Float64bits(out[1]))
|
||||
s.cfg.Redis.Set(redisKey, encoded, s.cfg.CacheTTL)
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (s *service) Close() error {
|
||||
if d := s.data.Load(); d != nil {
|
||||
return d.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue