feat: downloader

This commit is contained in:
Anatoly Antonov 2025-06-23 04:19:26 +03:00
parent b9c1a98895
commit 42e7924be9
37 changed files with 2422 additions and 94 deletions

View file

@ -3,10 +3,12 @@ package grib
import (
"context"
"encoding/binary"
"fmt"
"math"
"net/http"
"os"
"path/filepath"
"strings"
"sync/atomic"
"time"
@ -21,6 +23,12 @@ type RedisIface interface {
Get(key string) ([]byte, error)
}
type Service interface {
Update(ctx context.Context) error
Extract(ctx context.Context, lat, lon, alt float64, ts time.Time) ([2]float64, error)
Close() error
}
type ServiceConfig struct {
Dir string
TTL time.Duration
@ -36,7 +44,7 @@ type service struct {
data atomic.Pointer[dataset]
}
func New(cfg ServiceConfig) (*service, error) {
func New(cfg ServiceConfig) (Service, error) {
if cfg.TTL == 0 {
cfg.TTL = 24 * time.Hour
}
@ -44,29 +52,134 @@ func New(cfg ServiceConfig) (*service, error) {
return nil, err
}
s := &service{cfg: cfg, cache: memCache{ttl: cfg.CacheTTL}}
// Try to load existing dataset on startup
if err := s.loadExistingDataset(); err != nil {
// Log error but don't fail startup - dataset will be loaded on first Update()
// This allows the service to start even if no data is available yet
}
return s, nil
}
// loadExistingDataset tries to load the most recent available dataset
func (s *service) loadExistingDataset() error {
// Find the most recent cube file
pattern := filepath.Join(s.cfg.Dir, "*.cube")
matches, err := filepath.Glob(pattern)
if err != nil {
return err
}
if len(matches) == 0 {
return errcodes.ErrNoCubeFilesFound
}
// Sort by modification time (newest first)
var latestFile string
var latestTime time.Time
for _, match := range matches {
info, err := os.Stat(match)
if err != nil {
continue
}
if info.ModTime().After(latestTime) {
latestTime = info.ModTime()
latestFile = match
}
}
if latestFile == "" {
return errcodes.ErrNoValidCubeFilesFound
}
// Check if the file is fresh enough
if time.Since(latestTime) > s.cfg.TTL {
return errcodes.Wrap(errcodes.ErrLatestCubeFileIsTooOld, "latest cube file is too old")
}
// Load the dataset
c, err := openCube(latestFile)
if err != nil {
return err
}
// Extract run time from filename
base := filepath.Base(latestFile)
runStr := strings.TrimSuffix(base, ".cube")
run, err := time.Parse("20060102_15", runStr)
if err != nil {
c.Close()
return err
}
ds := &dataset{cube: c, runUTC: run.Unix()}
s.data.Store(ds)
return nil
}
// Update() downloads missing GRIBs, assembles cube into a single mmapfile.
func (s *service) Update(ctx context.Context) error {
// Check if we already have fresh data
if d := s.data.Load(); d != nil {
runTime := time.Unix(d.runUTC, 0)
if time.Since(runTime) < s.cfg.TTL {
// Data is still fresh, no need to update
return nil
}
}
unlock, err := s.cfg.Redis.Lock(ctx, "grib-dl", 45*time.Minute)
if err != nil {
return err
}
defer unlock(ctx)
dl := downloader.Downloader{Dir: s.cfg.Dir, Parallel: s.cfg.Parallel, Client: s.cfg.Client}
// Check again after acquiring lock (double-checked locking pattern)
if d := s.data.Load(); d != nil {
runTime := time.Unix(d.runUTC, 0)
if time.Since(runTime) < s.cfg.TTL {
// Another instance already updated the data
return nil
}
}
dl := Downloader{Dir: s.cfg.Dir, Parallel: s.cfg.Parallel, Client: s.cfg.Client}
run := nearestRun(time.Now().UTC().Add(-4 * time.Hour))
// Check if we already have this run
cubePath := filepath.Join(s.cfg.Dir, run.Format("20060102_15")) + ".cube"
if _, err := os.Stat(cubePath); err == nil {
// File exists, check if it's fresh
info, err := os.Stat(cubePath)
if err == nil && time.Since(info.ModTime()) < s.cfg.TTL {
// File is fresh, just load it
c, err := openCube(cubePath)
if err != nil {
return err
}
ds := &dataset{cube: c, runUTC: run.Unix()}
s.data.Store(ds)
s.cache = memCache{ttl: s.cfg.CacheTTL}
return nil
}
}
// Download new data
if err := dl.Run(ctx, run); err != nil {
return err
}
cubePath := filepath.Join(s.cfg.Dir, run.Format("20060102_15")) + ".cube"
// Assemble cube if it doesn't exist
if _, err := os.Stat(cubePath); err != nil {
if err := assembleCube(s.cfg.Dir, run, cubePath); err != nil {
return err
}
}
c, err := openCube(cubePath)
if err != nil {
return err
@ -101,26 +214,52 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
for ti, step := range steps {
fn := filepath.Join(dir, fileName(run, step))
gf, err := griblib.Read(fn)
file, err := os.Open(fn)
if err != nil {
return err
}
for _, m := range gf.Messages {
if m.ParameterShortName != "u" && m.ParameterShortName != "v" {
messages, err := griblib.ReadMessages(file)
file.Close() // Close immediately after reading
if err != nil {
return err
}
for _, m := range messages {
// Check if this is a wind component (u or v)
// ParameterCategory 2 = momentum, ParameterNumber 2 = u-wind, 3 = v-wind
if m.Section4.ProductDefinitionTemplateNumber != 0 {
continue
}
if m.TypeOfFirstFixedSurface != 100 {
product := m.Section4.ProductDefinitionTemplate
if product.ParameterCategory != 2 {
continue
}
pIdx, ok := pIndex[int(m.PressureLevel)]
var varIdx int
switch product.ParameterNumber {
case 2: // u-wind
varIdx = 0
case 3: // v-wind
varIdx = 1
default:
continue
}
// Check if this is a pressure level (type 100)
if product.FirstSurface.Type != 100 {
continue
}
// Get pressure level in hPa
pressure := float64(product.FirstSurface.Value) / 100.0
pIdx, ok := pIndex[int(math.Round(pressure))]
if !ok {
continue
}
varIdx := 0
if m.ParameterShortName == "v" {
varIdx = 1
}
vals := m.Values
vals := m.Data()
// GRIB library returns scan north->south, west->east already in row-major order
raw := make([]byte, len(vals)*4)
for i, v := range vals {
@ -142,13 +281,56 @@ func (s *service) Extract(ctx context.Context, lat, lon, alt float64, ts time.Ti
if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(48*time.Hour)) {
return zero, errcodes.ErrOutOfBounds
}
// Try memory cache first
key := encodeKey(lat, lon, alt, ts)
if v, ok := s.cache.get(key); ok {
return [2]float64(v), nil
}
// Try Redis cache
redisKey := fmt.Sprintf("grib:extract:%d", key)
if cached, err := s.cfg.Redis.Get(redisKey); err == nil {
var result [2]float64
if len(cached) == 16 {
result[0] = math.Float64frombits(binary.LittleEndian.Uint64(cached[:8]))
result[1] = math.Float64frombits(binary.LittleEndian.Uint64(cached[8:]))
s.cache.set(key, vec(result))
return result, nil
} else {
// Cache data is corrupted (wrong length)
return zero, errcodes.ErrRedisCacheCorrupted
}
} else {
// Check if it's a cache miss (expected error)
if errcode, ok := errcodes.AsErr(err); ok && errcode == errcodes.ErrRedisCacheMiss {
// Cache miss is expected, continue with calculation
} else {
// Unexpected error, return it
return zero, err
}
}
// Calculate result
td := ts.Sub(time.Unix(d.runUTC, 0)).Hours()
u, v := d.uv(lat, lon, alt, td)
out := [2]float64{u, v}
// Cache in memory
s.cache.set(key, vec(out))
// Cache in Redis
encoded := make([]byte, 16)
binary.LittleEndian.PutUint64(encoded[:8], math.Float64bits(out[0]))
binary.LittleEndian.PutUint64(encoded[8:], math.Float64bits(out[1]))
s.cfg.Redis.Set(redisKey, encoded, s.cfg.CacheTTL)
return out, nil
}
func (s *service) Close() error {
if d := s.data.Load(); d != nil {
return d.Close()
}
return nil
}