updated downloader

This commit is contained in:
straitz 2026-03-22 16:29:53 +09:00
parent ca95e06ab7
commit 8e9f117799
30 changed files with 1209 additions and 698 deletions

View file

@ -4,7 +4,6 @@ import (
"context"
"encoding/binary"
"math"
"net/http"
"os"
"path/filepath"
"strings"
@ -41,15 +40,12 @@ func New(cfg *Config) (Service, error) {
// Try to load existing dataset on startup
if err := s.loadExistingDataset(); err != nil {
// Log error but don't fail startup - dataset will be loaded on first Update()
// This allows the service to start even if no data is available yet
}
return s, nil
}
// loadExistingDataset tries to load the most recent available dataset
func (s *service) loadExistingDataset() error {
// Find the most recent cube file
pattern := filepath.Join(s.cfg.Dir, "*.cube")
matches, err := filepath.Glob(pattern)
if err != nil {
@ -60,7 +56,6 @@ func (s *service) loadExistingDataset() error {
return errcodes.ErrNoCubeFilesFound
}
// Sort by modification time (newest first)
var latestFile string
var latestTime time.Time
@ -69,7 +64,6 @@ func (s *service) loadExistingDataset() error {
if err != nil {
continue
}
if info.ModTime().After(latestTime) {
latestTime = info.ModTime()
latestFile = match
@ -80,18 +74,16 @@ func (s *service) loadExistingDataset() error {
return errcodes.ErrNoValidCubeFilesFound
}
// Check if the file is fresh enough
if time.Since(latestTime) > s.cfg.TTL {
return errcodes.Wrap(errcodes.ErrLatestCubeFileIsTooOld, "latest cube file is too old")
}
// Load the dataset
c, err := openCube(latestFile)
dc := &s.cfg.Dataset
c, err := openCube(latestFile, dc)
if err != nil {
return err
}
// Extract run time from filename
base := filepath.Base(latestFile)
runStr := strings.TrimSuffix(base, ".cube")
run, err := time.Parse("20060102_15", runStr)
@ -100,94 +92,70 @@ func (s *service) loadExistingDataset() error {
return err
}
ds := &dataset{cube: c, runUTC: run.Unix()}
s.data.Store(ds)
s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
return nil
}
// Update() downloads missing GRIBs, assembles cube into a single mmapfile.
func (s *service) Update(ctx context.Context) error {
// Check if we already have fresh data
if d := s.data.Load(); d != nil {
runTime := time.Unix(d.runUTC, 0)
if time.Since(runTime) < s.cfg.TTL {
// Data is still fresh, no need to update
return nil
}
}
// Check again after acquiring lock (double-checked locking pattern)
if d := s.data.Load(); d != nil {
runTime := time.Unix(d.runUTC, 0)
if time.Since(runTime) < s.cfg.TTL {
// Another instance already updated the data
return nil
}
}
run := nearestRun(time.Now().UTC().Add(-24 * time.Hour))
dc := &s.cfg.Dataset
run := nearestRun(time.Now().UTC().Add(-6 * time.Hour))
// Check if we already have this run
cubePath := filepath.Join(s.cfg.Dir, run.Format("20060102_15")) + ".cube"
if _, err := os.Stat(cubePath); err == nil {
// File exists, check if it's fresh
info, err := os.Stat(cubePath)
if err == nil && time.Since(info.ModTime()) < s.cfg.TTL {
// File is fresh, just load it
c, err := openCube(cubePath)
c, err := openCube(cubePath, dc)
if err != nil {
return err
}
ds := &dataset{cube: c, runUTC: run.Unix()}
s.data.Store(ds)
s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
s.cache = memCache{ttl: s.cfg.CacheTTL}
return nil
}
}
// Download new data using S3 or HTTP
var downloadErr error
if s.cfg.UseS3 {
s3dl, err := NewS3Downloader(s.cfg.Dir, s.cfg.Parallel, s.cfg.S3Bucket, s.cfg.S3Region)
if err != nil {
return errcodes.Wrap(err, "failed to create S3 downloader")
}
downloadErr = s3dl.Run(ctx, run)
} else {
dl := Downloader{
Dir: s.cfg.Dir,
Parallel: s.cfg.Parallel,
Client: http.DefaultClient,
DatasetURL: s.cfg.DatasetURL,
}
downloadErr = dl.Run(ctx, run)
downloadCtx, cancel := context.WithTimeout(ctx, 60*time.Minute)
defer cancel()
dl := NewPartialDownloader(s.cfg.Dir, s.cfg.Parallel, dc)
if err := dl.Run(downloadCtx, run); err != nil {
return err
}
if downloadErr != nil {
return downloadErr
}
// Assemble cube if it doesn't exist
if _, err := os.Stat(cubePath); err != nil {
if err := assembleCube(s.cfg.Dir, run, cubePath); err != nil {
if err := assembleCube(s.cfg.Dir, run, cubePath, dc); err != nil {
return err
}
}
c, err := openCube(cubePath)
c, err := openCube(cubePath, dc)
if err != nil {
return err
}
ds := &dataset{cube: c, runUTC: run.Unix()}
s.data.Store(ds)
s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
s.cache = memCache{ttl: s.cfg.CacheTTL}
return nil
}
func assembleCube(dir string, run time.Time, cubePath string) error {
const sizePerVar = 33 * 47 * 361 * 720 * 4 // 33 time steps (0-96 hours, 3-hour intervals), 47 pressure levels
total := int64(sizePerVar * 3) // 3 variables: gh, u, v
func assembleCube(dir string, run time.Time, cubePath string, dc *DatasetConfig) error {
sizePerVar := dc.SizePerVar()
total := dc.CubeSize()
gridBytes := int64(dc.GridSize()) * 4
f, err := os.Create(cubePath)
if err != nil {
return err
@ -203,27 +171,23 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
defer f.Close()
pIndex := make(map[int]int)
for i, p := range pressureLevels {
for i, p := range dc.Levels {
pIndex[int(math.Round(p))] = i
}
for ti, step := range steps {
fn := filepath.Join(dir, fileName(run, step))
processFile := func(fn string, ti int) error {
file, err := os.Open(fn)
if err != nil {
return err
}
messages, err := griblib.ReadMessages(file)
file.Close() // Close immediately after reading
file.Close()
if err != nil {
return err
}
for _, m := range messages {
// Check if this is a wind component (u or v) or geopotential height
// ParameterCategory 2 = momentum, ParameterNumber 2 = u-wind, 3 = v-wind
// ParameterCategory 3 = mass, ParameterNumber 5 = geopotential height
if m.Section4.ProductDefinitionTemplateNumber != 0 {
continue
}
@ -231,7 +195,6 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
product := m.Section4.ProductDefinitionTemplate
var varIdx int
// Match tawhiri variable order: ['gh', 'u', 'v'] (indices 0, 1, 2)
if product.ParameterCategory == 2 {
switch product.ParameterNumber {
case 2: // u-wind
@ -242,18 +205,15 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
continue
}
} else if product.ParameterCategory == 3 && product.ParameterNumber == 5 {
// geopotential height
varIdx = 0
varIdx = 0 // geopotential height
} else {
continue
}
// Check if this is a pressure level (type 100)
if product.FirstSurface.Type != 100 {
continue
}
// Get pressure level in hPa
pressure := float64(product.FirstSurface.Value) / 100.0
pIdx, ok := pIndex[int(math.Round(pressure))]
if !ok {
@ -261,14 +221,27 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
}
vals := m.Data()
// GRIB library returns scan north->south, west->east already in row-major order
raw := make([]byte, len(vals)*4)
for i, v := range vals {
binary.LittleEndian.PutUint32(raw[i*4:], math.Float32bits(float32(v)))
}
base := int64(varIdx*sizePerVar + (ti*47+pIdx)*361*720*4)
base := int64(varIdx)*sizePerVar + (int64(ti)*int64(dc.NP)+int64(pIdx))*gridBytes
copy(mm[base:base+int64(len(raw))], raw)
}
return nil
}
steps := dc.Steps()
for ti, step := range steps {
fn := filepath.Join(dir, dc.FileName(run, step))
if err := processFile(fn, ti); err != nil {
return err
}
fnB := filepath.Join(dir, dc.FileNameB(run, step))
if err := processFile(fnB, ti); err != nil {
return err
}
}
return mm.Flush()
}
@ -279,24 +252,21 @@ func (s *service) Extract(ctx context.Context, lat, lon, alt float64, ts time.Ti
if d == nil {
return zero, errcodes.ErrNoDataset
}
if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(96*time.Hour)) {
maxDur := time.Duration(s.cfg.Dataset.MaxHour) * time.Hour
if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(maxDur)) {
return zero, errcodes.ErrOutOfBounds
}
// Try memory cache first
key := encodeKey(lat, lon, alt, ts)
if v, ok := s.cache.get(key); ok {
return [2]float64(v), nil
}
// Calculate result
td := ts.Sub(time.Unix(d.runUTC, 0)).Hours()
u, v := d.uv(lat, lon, alt, td)
out := [2]float64{u, v}
// Cache in memory
s.cache.set(key, vec(out))
return out, nil
}