updated downloader
This commit is contained in:
parent
ca95e06ab7
commit
8e9f117799
30 changed files with 1209 additions and 698 deletions
25
internal/pkg/grib/assemble_test.go
Normal file
25
internal/pkg/grib/assemble_test.go
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
package grib
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestAssembleCubeFromExisting(t *testing.T) {
|
||||
dir := "C:/tmp/grib"
|
||||
run := time.Date(2026, 1, 16, 6, 0, 0, 0, time.UTC)
|
||||
cubePath := dir + "/" + run.Format("20060102_15") + ".cube"
|
||||
|
||||
t.Logf("Assembling cube from existing GRIB files...")
|
||||
t.Logf("Directory: %s", dir)
|
||||
t.Logf("Run: %s", run.Format("2006-01-02 15:04 MST"))
|
||||
t.Logf("Output: %s", cubePath)
|
||||
|
||||
err := assembleCube(dir, run, cubePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to assemble cube: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("✓ Cube assembled successfully!")
|
||||
t.Logf("Cube file: %s", cubePath)
|
||||
}
|
||||
|
|
@ -1,23 +1,130 @@
|
|||
package grib
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
|
||||
env "github.com/caarlos0/env/v11"
|
||||
)
|
||||
|
||||
// DatasetConfig описывает параметры GFS-датасета: сетку, временные шаги,
|
||||
// уровни давления и URL для загрузки.
|
||||
type DatasetConfig struct {
|
||||
// Сетка
|
||||
Resolution float64 // шаг сетки в градусах (0.25 или 0.5)
|
||||
NLat int // точек по широте (721 для 0.25°, 361 для 0.5°)
|
||||
NLon int // точек по долготе (1440 для 0.25°, 720 для 0.5°)
|
||||
|
||||
// Время
|
||||
NT int // кол-во временных шагов (97 для 0–96 ч с шагом 1)
|
||||
MaxHour int // последний час прогноза (96)
|
||||
TimeStep int // интервал между шагами, часы (1 или 3)
|
||||
|
||||
// Вертикаль
|
||||
NP int // кол-во уровней давления
|
||||
Levels []float64 // уровни давления в гПа, по убыванию (1000 … 1)
|
||||
|
||||
// Переменные в кубе (порядок важен: индексы 0, 1, 2, …)
|
||||
NVar int // кол-во переменных
|
||||
Variables []string // GRIB-имена для фильтрации idx (HGT, UGRD, VGRD)
|
||||
|
||||
// URL загрузки (fmt-шаблоны: date, hour, hour, step)
|
||||
URLMask string // основной pgrb2
|
||||
URLMaskB string // дополнительный pgrb2b
|
||||
|
||||
// Имена файлов
|
||||
FileSuffix string // токен разрешения в именах файлов ("0p25", "0p50")
|
||||
}
|
||||
|
||||
// SizePerVar возвращает размер одной переменной в кубе, байт.
|
||||
func (dc *DatasetConfig) SizePerVar() int64 {
|
||||
return int64(dc.NT) * int64(dc.NP) * int64(dc.NLat) * int64(dc.NLon) * 4
|
||||
}
|
||||
|
||||
// CubeSize возвращает полный размер куба, байт.
|
||||
func (dc *DatasetConfig) CubeSize() int64 {
|
||||
return dc.SizePerVar() * int64(dc.NVar)
|
||||
}
|
||||
|
||||
// GridSize возвращает NLat * NLon.
|
||||
func (dc *DatasetConfig) GridSize() int {
|
||||
return dc.NLat * dc.NLon
|
||||
}
|
||||
|
||||
// InvResolution возвращает 1/Resolution — множитель для перевода координат в индексы.
|
||||
func (dc *DatasetConfig) InvResolution() float64 {
|
||||
return 1.0 / dc.Resolution
|
||||
}
|
||||
|
||||
// Steps возвращает список часов прогноза [0, TimeStep, 2*TimeStep, …, MaxHour].
|
||||
func (dc *DatasetConfig) Steps() []int {
|
||||
out := make([]int, 0, dc.NT)
|
||||
for h := 0; h <= dc.MaxHour; h += dc.TimeStep {
|
||||
out = append(out, h)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// FileName возвращает имя основного GRIB-файла (pgrb2).
|
||||
func (dc *DatasetConfig) FileName(run time.Time, step int) string {
|
||||
return fmt.Sprintf("gfs.t%02dz.pgrb2.%s.f%03d", run.Hour(), dc.FileSuffix, step)
|
||||
}
|
||||
|
||||
// FileNameB возвращает имя вторичного GRIB-файла (pgrb2b).
|
||||
func (dc *DatasetConfig) FileNameB(run time.Time, step int) string {
|
||||
return fmt.Sprintf("gfs.t%02dz.pgrb2b.%s.f%03d", run.Hour(), dc.FileSuffix, step)
|
||||
}
|
||||
|
||||
// GribURL возвращает URL основного GRIB-файла.
|
||||
func (dc *DatasetConfig) GribURL(run time.Time, step int) string {
|
||||
return fmt.Sprintf(dc.URLMask, run.Format("20060102"), run.Hour(), run.Hour(), step)
|
||||
}
|
||||
|
||||
// GribURLB возвращает URL вторичного GRIB-файла.
|
||||
func (dc *DatasetConfig) GribURLB(run time.Time, step int) string {
|
||||
return fmt.Sprintf(dc.URLMaskB, run.Format("20060102"), run.Hour(), run.Hour(), step)
|
||||
}
|
||||
|
||||
// DefaultDatasetConfig возвращает конфиг GFS 0.25° / 1 час / 47 уровней.
|
||||
func DefaultDatasetConfig() DatasetConfig {
|
||||
return DatasetConfig{
|
||||
Resolution: 0.25,
|
||||
NLat: 721,
|
||||
NLon: 1440,
|
||||
|
||||
NT: 97,
|
||||
MaxHour: 96,
|
||||
TimeStep: 1,
|
||||
|
||||
NP: 47,
|
||||
Levels: []float64{
|
||||
1000, 975, 950, 925, 900, 875, 850, 825, 800, 775,
|
||||
750, 725, 700, 675, 650, 625, 600, 575, 550, 525,
|
||||
500, 475, 450, 425, 400, 375, 350, 325, 300, 275,
|
||||
250, 225, 200, 175, 150, 125, 100, 70, 50, 30,
|
||||
20, 10, 7, 5, 3, 2, 1,
|
||||
},
|
||||
|
||||
NVar: 3,
|
||||
Variables: []string{"HGT", "UGRD", "VGRD"},
|
||||
|
||||
URLMask: "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p25.f%03d",
|
||||
URLMaskB: "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2b.0p25.f%03d",
|
||||
|
||||
FileSuffix: "0p25",
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type Config struct {
|
||||
Dir string `env:"DIR" envDefault:"C:/tmp/grib"`
|
||||
TTL time.Duration `env:"TTL" envDefault:"48h"`
|
||||
CacheTTL time.Duration `env:"CACHE_TTL" envDefault:"1h"`
|
||||
Parallel int `env:"PARALLEL" envDefault:"8"`
|
||||
DatasetURL string `env:"DATASET_URL" envDefault:"https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod"`
|
||||
// S3 configuration
|
||||
UseS3 bool `env:"USE_S3" envDefault:"true"`
|
||||
S3Bucket string `env:"S3_BUCKET" envDefault:"noaa-gfs-bdp-pds"`
|
||||
S3Region string `env:"S3_REGION" envDefault:"us-east-1"`
|
||||
S3Timeout time.Duration `env:"S3_TIMEOUT" envDefault:"300s"`
|
||||
Dir string `env:"DIR" envDefault:"C:/tmp/grib"`
|
||||
TTL time.Duration `env:"TTL" envDefault:"48h"`
|
||||
CacheTTL time.Duration `env:"CACHE_TTL" envDefault:"1h"`
|
||||
Parallel int `env:"PARALLEL" envDefault:"8"`
|
||||
|
||||
Dataset DatasetConfig
|
||||
}
|
||||
|
||||
func NewConfig() (*Config, error) {
|
||||
|
|
@ -27,6 +134,6 @@ func NewConfig() (*Config, error) {
|
|||
}); err != nil {
|
||||
return nil, errcodes.Wrap(err, "failed to parse GRIB config")
|
||||
}
|
||||
|
||||
cfg.Dataset = DefaultDatasetConfig()
|
||||
return cfg, nil
|
||||
}
|
||||
|
|
|
|||
0
internal/pkg/grib/create_dataset.go
Normal file
0
internal/pkg/grib/create_dataset.go
Normal file
|
|
@ -15,7 +15,7 @@ type cube struct {
|
|||
file *os.File
|
||||
}
|
||||
|
||||
func openCube(path string) (*cube, error) {
|
||||
func openCube(path string, dc *DatasetConfig) (*cube, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -27,14 +27,15 @@ func openCube(path string) (*cube, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
const (
|
||||
nT = 33 // 0-96 hours with step 3 hours (33 time steps)
|
||||
nP = 47 // 47 pressure levels matching tawhiri
|
||||
nLat = 361
|
||||
nLon = 720
|
||||
)
|
||||
|
||||
return &cube{mm: mm, t: nT, p: nP, lat: nLat, lon: nLon, bytesPerVar: int64(nT * nP * nLat * nLon * 4), file: f}, nil
|
||||
return &cube{
|
||||
mm: mm,
|
||||
t: dc.NT,
|
||||
p: dc.NP,
|
||||
lat: dc.NLat,
|
||||
lon: dc.NLon,
|
||||
bytesPerVar: dc.SizePerVar(),
|
||||
file: f,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *cube) val(varIdx, ti, pi, y, x int) float32 {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package grib
|
|||
|
||||
type dataset struct {
|
||||
cube *cube
|
||||
ds *DatasetConfig
|
||||
runUTC int64 // unix seconds
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,91 +0,0 @@
|
|||
package grib
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
type Downloader struct {
|
||||
Dir string
|
||||
Parallel int
|
||||
Client *http.Client
|
||||
DatasetURL string
|
||||
}
|
||||
|
||||
func (d *Downloader) fileURL(run string, hour int, step int) string {
|
||||
return fmt.Sprintf("%s/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p50.f%03d", d.DatasetURL, run, hour, hour, step)
|
||||
}
|
||||
|
||||
func (d *Downloader) fetch(ctx context.Context, url, dst string) (err error) {
|
||||
// Check if final file already exists
|
||||
if _, err := os.Stat(dst); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
tmp := dst + ".part"
|
||||
|
||||
// Remove old .part file if it exists (fixes race condition)
|
||||
os.Remove(tmp)
|
||||
|
||||
f, err := os.Create(tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Cleanup .part file on any error (using named return value)
|
||||
defer func() {
|
||||
f.Close()
|
||||
if err != nil {
|
||||
os.Remove(tmp)
|
||||
}
|
||||
}()
|
||||
|
||||
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
resp, err := d.Client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return errcodes.Wrap(errcodes.ErrDownload, "bad status: "+resp.Status)
|
||||
}
|
||||
|
||||
if _, err := io.Copy(f, resp.Body); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Close file before rename
|
||||
if err := f.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If rename fails, err will be set and defer will cleanup .part file
|
||||
return os.Rename(tmp, dst)
|
||||
}
|
||||
|
||||
func (d *Downloader) Run(ctx context.Context, run time.Time) error {
|
||||
runStr := run.Format("20060102")
|
||||
hour := run.Hour()
|
||||
g, ctx := errgroup.WithContext(ctx)
|
||||
sem := make(chan struct{}, d.Parallel)
|
||||
for _, step := range steps {
|
||||
step := step
|
||||
sem <- struct{}{}
|
||||
g.Go(func() error {
|
||||
defer func() { <-sem }()
|
||||
url := d.fileURL(runStr, hour, step)
|
||||
dst := filepath.Join(d.Dir, fileName(run, step))
|
||||
return d.fetch(ctx, url, dst)
|
||||
})
|
||||
}
|
||||
return g.Wait()
|
||||
}
|
||||
|
|
@ -4,30 +4,100 @@ import "math"
|
|||
|
||||
func lerp(a, b, t float64) float64 { return a + t*(b-a) }
|
||||
|
||||
// Interpolate 16‑point (time, p, lat, lon)
|
||||
// ghInterp returns interpolated geopotential height at given time/pressure/lat/lon
|
||||
func (d *dataset) ghInterp(ti, pi int, y0, y1, x0, x1 int, wy, wx float64) float64 {
|
||||
g00 := d.cube.val(0, ti, pi, y0, x0)
|
||||
g10 := d.cube.val(0, ti, pi, y0, x1)
|
||||
g01 := d.cube.val(0, ti, pi, y1, x0)
|
||||
g11 := d.cube.val(0, ti, pi, y1, x1)
|
||||
return (1-wy)*((1-wx)*float64(g00)+wx*float64(g10)) + wy*((1-wx)*float64(g01)+wx*float64(g11))
|
||||
}
|
||||
|
||||
// searchAltLevel uses geopotential height to find pressure level bracket for target altitude.
|
||||
func (d *dataset) searchAltLevel(alt float64, ti, y0, y1, x0, x1 int, wy, wx float64) (int, float64) {
|
||||
levels := d.ds.Levels
|
||||
nLevels := len(levels)
|
||||
|
||||
lo, hi := 0, nLevels-1
|
||||
for lo < hi-1 {
|
||||
mid := (lo + hi) / 2
|
||||
ghMid := d.ghInterp(ti, mid, y0, y1, x0, x1, wy, wx)
|
||||
if ghMid < alt {
|
||||
lo = mid
|
||||
} else {
|
||||
hi = mid
|
||||
}
|
||||
}
|
||||
|
||||
ghLo := d.ghInterp(ti, lo, y0, y1, x0, x1, wy, wx)
|
||||
ghHi := d.ghInterp(ti, hi, y0, y1, x0, x1, wy, wx)
|
||||
|
||||
wp := 0.0
|
||||
if ghHi != ghLo {
|
||||
wp = (alt - ghLo) / (ghHi - ghLo)
|
||||
}
|
||||
if wp < 0 {
|
||||
wp = 0
|
||||
}
|
||||
if wp > 1 {
|
||||
wp = 1
|
||||
}
|
||||
|
||||
return lo, wp
|
||||
}
|
||||
|
||||
// uv выполняет интерполяцию ветра по 4 измерениям (time, pressure, lat, lon).
|
||||
func (d *dataset) uv(lat, lon, alt float64, tHours float64) (float64, float64) {
|
||||
if lon < 0 {
|
||||
lon += 360
|
||||
}
|
||||
iy := (lat + 90) * 2
|
||||
|
||||
inv := d.ds.InvResolution()
|
||||
|
||||
// GRIB scan north→south: index 0 = 90°N
|
||||
iy := (90 - lat) * inv
|
||||
y0 := int(math.Floor(iy))
|
||||
if y0 < 0 {
|
||||
y0 = 0
|
||||
}
|
||||
if y0 >= d.cube.lat-1 {
|
||||
y0 = d.cube.lat - 2
|
||||
}
|
||||
y1 := y0 + 1
|
||||
wy := iy - float64(y0)
|
||||
ix := lon * 2
|
||||
|
||||
ix := lon * inv
|
||||
x0 := int(math.Floor(ix)) % d.cube.lon
|
||||
x1 := (x0 + 1) % d.cube.lon
|
||||
wx := ix - float64(x0)
|
||||
// For 3-hourly data (step = 3 hours)
|
||||
// Convert tHours to 3-hour index (e.g., 1.5 hours -> index 0.5, interpolate between 0 and 1)
|
||||
it0 := int(math.Floor(tHours / 3.0))
|
||||
wt := (tHours - float64(it0*3)) / 3.0 // Interpolation weight within 3-hour window
|
||||
|
||||
// Время: tHours делим на шаг, чтобы получить индекс в кубе
|
||||
tIdx := tHours / float64(d.ds.TimeStep)
|
||||
it0 := int(math.Floor(tIdx))
|
||||
if it0 < 0 {
|
||||
it0 = 0
|
||||
}
|
||||
if it0 >= d.cube.t-1 {
|
||||
it0 = d.cube.t - 2
|
||||
}
|
||||
wt := tIdx - float64(it0)
|
||||
|
||||
// ISA: высота → давление → индекс уровня
|
||||
levels := d.ds.Levels
|
||||
p := pressureFromAlt(alt)
|
||||
ip0 := 0
|
||||
for ip0+1 < len(pressureLevels) && pressureLevels[ip0+1] > p {
|
||||
for ip0+1 < len(levels) && levels[ip0+1] > p {
|
||||
ip0++
|
||||
}
|
||||
ip1 := ip0 + 1
|
||||
wp := (pressureLevels[ip0] - p) / (pressureLevels[ip0] - pressureLevels[ip1])
|
||||
if ip1 >= len(levels) {
|
||||
ip1 = len(levels) - 1
|
||||
}
|
||||
wp := 0.0
|
||||
if levels[ip0] != levels[ip1] {
|
||||
wp = (levels[ip0] - p) / (levels[ip0] - levels[ip1])
|
||||
}
|
||||
|
||||
fetch := func(ti, pi int) (float64, float64) {
|
||||
u00 := d.cube.val(1, ti, pi, y0, x0)
|
||||
u10 := d.cube.val(1, ti, pi, y0, x1)
|
||||
|
|
@ -41,6 +111,7 @@ func (d *dataset) uv(lat, lon, alt float64, tHours float64) (float64, float64) {
|
|||
vxy := (1-wy)*((1-wx)*float64(v00)+wx*float64(v10)) + wy*((1-wx)*float64(v01)+wx*float64(v11))
|
||||
return uxy, vxy
|
||||
}
|
||||
|
||||
u0p0, v0p0 := fetch(it0, ip0)
|
||||
u0p1, v0p1 := fetch(it0, ip1)
|
||||
u1p0, v1p0 := fetch(it0+1, ip0)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import (
|
|||
"context"
|
||||
"encoding/binary"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
|
@ -41,15 +40,12 @@ func New(cfg *Config) (Service, error) {
|
|||
// Try to load existing dataset on startup
|
||||
if err := s.loadExistingDataset(); err != nil {
|
||||
// Log error but don't fail startup - dataset will be loaded on first Update()
|
||||
// This allows the service to start even if no data is available yet
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// loadExistingDataset tries to load the most recent available dataset
|
||||
func (s *service) loadExistingDataset() error {
|
||||
// Find the most recent cube file
|
||||
pattern := filepath.Join(s.cfg.Dir, "*.cube")
|
||||
matches, err := filepath.Glob(pattern)
|
||||
if err != nil {
|
||||
|
|
@ -60,7 +56,6 @@ func (s *service) loadExistingDataset() error {
|
|||
return errcodes.ErrNoCubeFilesFound
|
||||
}
|
||||
|
||||
// Sort by modification time (newest first)
|
||||
var latestFile string
|
||||
var latestTime time.Time
|
||||
|
||||
|
|
@ -69,7 +64,6 @@ func (s *service) loadExistingDataset() error {
|
|||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if info.ModTime().After(latestTime) {
|
||||
latestTime = info.ModTime()
|
||||
latestFile = match
|
||||
|
|
@ -80,18 +74,16 @@ func (s *service) loadExistingDataset() error {
|
|||
return errcodes.ErrNoValidCubeFilesFound
|
||||
}
|
||||
|
||||
// Check if the file is fresh enough
|
||||
if time.Since(latestTime) > s.cfg.TTL {
|
||||
return errcodes.Wrap(errcodes.ErrLatestCubeFileIsTooOld, "latest cube file is too old")
|
||||
}
|
||||
|
||||
// Load the dataset
|
||||
c, err := openCube(latestFile)
|
||||
dc := &s.cfg.Dataset
|
||||
c, err := openCube(latestFile, dc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Extract run time from filename
|
||||
base := filepath.Base(latestFile)
|
||||
runStr := strings.TrimSuffix(base, ".cube")
|
||||
run, err := time.Parse("20060102_15", runStr)
|
||||
|
|
@ -100,94 +92,70 @@ func (s *service) loadExistingDataset() error {
|
|||
return err
|
||||
}
|
||||
|
||||
ds := &dataset{cube: c, runUTC: run.Unix()}
|
||||
s.data.Store(ds)
|
||||
|
||||
s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update() downloads missing GRIBs, assembles cube into a single mmap‑file.
|
||||
func (s *service) Update(ctx context.Context) error {
|
||||
// Check if we already have fresh data
|
||||
if d := s.data.Load(); d != nil {
|
||||
runTime := time.Unix(d.runUTC, 0)
|
||||
if time.Since(runTime) < s.cfg.TTL {
|
||||
// Data is still fresh, no need to update
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Check again after acquiring lock (double-checked locking pattern)
|
||||
if d := s.data.Load(); d != nil {
|
||||
runTime := time.Unix(d.runUTC, 0)
|
||||
if time.Since(runTime) < s.cfg.TTL {
|
||||
// Another instance already updated the data
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
run := nearestRun(time.Now().UTC().Add(-24 * time.Hour))
|
||||
dc := &s.cfg.Dataset
|
||||
run := nearestRun(time.Now().UTC().Add(-6 * time.Hour))
|
||||
|
||||
// Check if we already have this run
|
||||
cubePath := filepath.Join(s.cfg.Dir, run.Format("20060102_15")) + ".cube"
|
||||
if _, err := os.Stat(cubePath); err == nil {
|
||||
// File exists, check if it's fresh
|
||||
info, err := os.Stat(cubePath)
|
||||
if err == nil && time.Since(info.ModTime()) < s.cfg.TTL {
|
||||
// File is fresh, just load it
|
||||
c, err := openCube(cubePath)
|
||||
c, err := openCube(cubePath, dc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ds := &dataset{cube: c, runUTC: run.Unix()}
|
||||
s.data.Store(ds)
|
||||
s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
|
||||
s.cache = memCache{ttl: s.cfg.CacheTTL}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Download new data using S3 or HTTP
|
||||
var downloadErr error
|
||||
if s.cfg.UseS3 {
|
||||
s3dl, err := NewS3Downloader(s.cfg.Dir, s.cfg.Parallel, s.cfg.S3Bucket, s.cfg.S3Region)
|
||||
if err != nil {
|
||||
return errcodes.Wrap(err, "failed to create S3 downloader")
|
||||
}
|
||||
downloadErr = s3dl.Run(ctx, run)
|
||||
} else {
|
||||
dl := Downloader{
|
||||
Dir: s.cfg.Dir,
|
||||
Parallel: s.cfg.Parallel,
|
||||
Client: http.DefaultClient,
|
||||
DatasetURL: s.cfg.DatasetURL,
|
||||
}
|
||||
downloadErr = dl.Run(ctx, run)
|
||||
downloadCtx, cancel := context.WithTimeout(ctx, 60*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
dl := NewPartialDownloader(s.cfg.Dir, s.cfg.Parallel, dc)
|
||||
if err := dl.Run(downloadCtx, run); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if downloadErr != nil {
|
||||
return downloadErr
|
||||
}
|
||||
|
||||
// Assemble cube if it doesn't exist
|
||||
if _, err := os.Stat(cubePath); err != nil {
|
||||
if err := assembleCube(s.cfg.Dir, run, cubePath); err != nil {
|
||||
if err := assembleCube(s.cfg.Dir, run, cubePath, dc); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c, err := openCube(cubePath)
|
||||
c, err := openCube(cubePath, dc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ds := &dataset{cube: c, runUTC: run.Unix()}
|
||||
s.data.Store(ds)
|
||||
s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
|
||||
s.cache = memCache{ttl: s.cfg.CacheTTL}
|
||||
return nil
|
||||
}
|
||||
|
||||
func assembleCube(dir string, run time.Time, cubePath string) error {
|
||||
const sizePerVar = 33 * 47 * 361 * 720 * 4 // 33 time steps (0-96 hours, 3-hour intervals), 47 pressure levels
|
||||
total := int64(sizePerVar * 3) // 3 variables: gh, u, v
|
||||
func assembleCube(dir string, run time.Time, cubePath string, dc *DatasetConfig) error {
|
||||
sizePerVar := dc.SizePerVar()
|
||||
total := dc.CubeSize()
|
||||
gridBytes := int64(dc.GridSize()) * 4
|
||||
|
||||
f, err := os.Create(cubePath)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -203,27 +171,23 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
|
|||
defer f.Close()
|
||||
|
||||
pIndex := make(map[int]int)
|
||||
for i, p := range pressureLevels {
|
||||
for i, p := range dc.Levels {
|
||||
pIndex[int(math.Round(p))] = i
|
||||
}
|
||||
|
||||
for ti, step := range steps {
|
||||
fn := filepath.Join(dir, fileName(run, step))
|
||||
processFile := func(fn string, ti int) error {
|
||||
file, err := os.Open(fn)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
messages, err := griblib.ReadMessages(file)
|
||||
file.Close() // Close immediately after reading
|
||||
file.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range messages {
|
||||
// Check if this is a wind component (u or v) or geopotential height
|
||||
// ParameterCategory 2 = momentum, ParameterNumber 2 = u-wind, 3 = v-wind
|
||||
// ParameterCategory 3 = mass, ParameterNumber 5 = geopotential height
|
||||
if m.Section4.ProductDefinitionTemplateNumber != 0 {
|
||||
continue
|
||||
}
|
||||
|
|
@ -231,7 +195,6 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
|
|||
product := m.Section4.ProductDefinitionTemplate
|
||||
|
||||
var varIdx int
|
||||
// Match tawhiri variable order: ['gh', 'u', 'v'] (indices 0, 1, 2)
|
||||
if product.ParameterCategory == 2 {
|
||||
switch product.ParameterNumber {
|
||||
case 2: // u-wind
|
||||
|
|
@ -242,18 +205,15 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
|
|||
continue
|
||||
}
|
||||
} else if product.ParameterCategory == 3 && product.ParameterNumber == 5 {
|
||||
// geopotential height
|
||||
varIdx = 0
|
||||
varIdx = 0 // geopotential height
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if this is a pressure level (type 100)
|
||||
if product.FirstSurface.Type != 100 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get pressure level in hPa
|
||||
pressure := float64(product.FirstSurface.Value) / 100.0
|
||||
pIdx, ok := pIndex[int(math.Round(pressure))]
|
||||
if !ok {
|
||||
|
|
@ -261,14 +221,27 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
|
|||
}
|
||||
|
||||
vals := m.Data()
|
||||
// GRIB library returns scan north->south, west->east already in row-major order
|
||||
raw := make([]byte, len(vals)*4)
|
||||
for i, v := range vals {
|
||||
binary.LittleEndian.PutUint32(raw[i*4:], math.Float32bits(float32(v)))
|
||||
}
|
||||
base := int64(varIdx*sizePerVar + (ti*47+pIdx)*361*720*4)
|
||||
base := int64(varIdx)*sizePerVar + (int64(ti)*int64(dc.NP)+int64(pIdx))*gridBytes
|
||||
copy(mm[base:base+int64(len(raw))], raw)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
steps := dc.Steps()
|
||||
for ti, step := range steps {
|
||||
fn := filepath.Join(dir, dc.FileName(run, step))
|
||||
if err := processFile(fn, ti); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fnB := filepath.Join(dir, dc.FileNameB(run, step))
|
||||
if err := processFile(fnB, ti); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return mm.Flush()
|
||||
}
|
||||
|
|
@ -279,24 +252,21 @@ func (s *service) Extract(ctx context.Context, lat, lon, alt float64, ts time.Ti
|
|||
if d == nil {
|
||||
return zero, errcodes.ErrNoDataset
|
||||
}
|
||||
if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(96*time.Hour)) {
|
||||
maxDur := time.Duration(s.cfg.Dataset.MaxHour) * time.Hour
|
||||
if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(maxDur)) {
|
||||
return zero, errcodes.ErrOutOfBounds
|
||||
}
|
||||
|
||||
// Try memory cache first
|
||||
key := encodeKey(lat, lon, alt, ts)
|
||||
if v, ok := s.cache.get(key); ok {
|
||||
return [2]float64(v), nil
|
||||
}
|
||||
|
||||
// Calculate result
|
||||
td := ts.Sub(time.Unix(d.runUTC, 0)).Hours()
|
||||
u, v := d.uv(lat, lon, alt, td)
|
||||
out := [2]float64{u, v}
|
||||
|
||||
// Cache in memory
|
||||
s.cache.set(key, vec(out))
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
350
internal/pkg/grib/partial_downloader.go
Normal file
350
internal/pkg/grib/partial_downloader.go
Normal file
|
|
@ -0,0 +1,350 @@
|
|||
package grib
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
|
||||
"git.intra.yksa.space/gsn/predictor/internal/pkg/log"
|
||||
"go.uber.org/zap"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
// PartialDownloader загружает только необходимые поля из GRIB файлов
|
||||
// используя HTTP Range requests и .idx индексные файлы
|
||||
type PartialDownloader struct {
|
||||
Dir string
|
||||
Parallel int
|
||||
Client *http.Client
|
||||
Variables []string
|
||||
ds *DatasetConfig
|
||||
}
|
||||
|
||||
// NewPartialDownloader создаёт новый partial downloader
|
||||
func NewPartialDownloader(dir string, parallel int, dc *DatasetConfig) *PartialDownloader {
|
||||
return &PartialDownloader{
|
||||
Dir: dir,
|
||||
Parallel: parallel,
|
||||
Client: &http.Client{
|
||||
Timeout: 60 * time.Second,
|
||||
},
|
||||
Variables: dc.Variables,
|
||||
ds: dc,
|
||||
}
|
||||
}
|
||||
|
||||
// idxEntry представляет запись из .idx файла
|
||||
type idxEntry struct {
|
||||
Index int
|
||||
ByteStart int64
|
||||
Date string
|
||||
Variable string
|
||||
Level string
|
||||
Forecast string
|
||||
}
|
||||
|
||||
type ProgressWriter struct {
|
||||
Total int64
|
||||
Downloaded int64
|
||||
OnProgress func(percent float64)
|
||||
}
|
||||
|
||||
func (pw *ProgressWriter) Write(p []byte) (int, error) {
|
||||
n := len(p)
|
||||
pw.Downloaded += int64(n)
|
||||
if pw.Total > 0 && pw.OnProgress != nil {
|
||||
percent := float64(pw.Downloaded) / float64(pw.Total) * 100
|
||||
pw.OnProgress(percent)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// parseIdx парсит .idx файл и возвращает записи
|
||||
func (d *PartialDownloader) parseIdx(body []byte) []idxEntry {
|
||||
var entries []idxEntry
|
||||
lines := strings.Split(string(body), "\n")
|
||||
|
||||
for _, line := range lines {
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
parts := strings.Split(line, ":")
|
||||
if len(parts) < 7 {
|
||||
continue
|
||||
}
|
||||
|
||||
byteStart, _ := strconv.ParseInt(parts[1], 10, 64)
|
||||
entries = append(entries, idxEntry{
|
||||
Index: len(entries),
|
||||
ByteStart: byteStart,
|
||||
Date: parts[2],
|
||||
Variable: parts[3],
|
||||
Level: parts[4],
|
||||
Forecast: parts[5],
|
||||
})
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
// filterEntries фильтрует записи по нужным переменным и уровням давления
|
||||
func (d *PartialDownloader) filterEntries(entries []idxEntry) []idxEntry {
|
||||
var filtered []idxEntry
|
||||
|
||||
for _, e := range entries {
|
||||
isNeededVar := false
|
||||
for _, v := range d.Variables {
|
||||
if v == e.Variable {
|
||||
isNeededVar = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
isPressureLevel := strings.HasSuffix(e.Level, " mb")
|
||||
|
||||
if isNeededVar && isPressureLevel {
|
||||
filtered = append(filtered, e)
|
||||
}
|
||||
}
|
||||
|
||||
return filtered
|
||||
}
|
||||
|
||||
// Вспомогательная функция для выполнения запроса с повторами
|
||||
func (d *PartialDownloader) doWithRetry(ctx context.Context, req *http.Request) (*http.Response, error) {
|
||||
var resp *http.Response
|
||||
var err error
|
||||
|
||||
backoff := 1 * time.Second
|
||||
maxRetries := 3
|
||||
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
resp, err = d.Client.Do(req)
|
||||
if err == nil && resp.StatusCode < 500 {
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
log.Ctx(ctx).Warn("retry download", zap.Int("attempt", i+1), zap.Error(err))
|
||||
|
||||
select {
|
||||
case <-time.After(backoff):
|
||||
backoff *= 2
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// downloadRange загружает диапазон байтов из URL
|
||||
func (d *PartialDownloader) downloadRange(ctx context.Context, url string, start, end int64, out io.Writer) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rangeHeader := fmt.Sprintf("bytes=%d-", start)
|
||||
if end > 0 {
|
||||
rangeHeader = fmt.Sprintf("bytes=%d-%d", start, end)
|
||||
}
|
||||
req.Header.Set("Range", rangeHeader)
|
||||
|
||||
resp, err := d.Client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK {
|
||||
return errcodes.Wrap(errcodes.ErrDownload, "bad status: "+resp.Status)
|
||||
}
|
||||
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *PartialDownloader) downloadFieldsFromURL(ctx context.Context, url string, dst string, step int) (err error) {
|
||||
idxURL := url + ".idx"
|
||||
tmp := dst + ".part"
|
||||
|
||||
if info, err := os.Stat(dst); err == nil && info.Size() > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
reqIdx, _ := http.NewRequestWithContext(ctx, http.MethodGet, idxURL, nil)
|
||||
respIdx, err := d.doWithRetry(ctx, reqIdx)
|
||||
if err != nil {
|
||||
return errcodes.Wrap(err, "failed to get idx")
|
||||
}
|
||||
defer respIdx.Body.Close()
|
||||
|
||||
idxBody, _ := io.ReadAll(respIdx.Body)
|
||||
entries := d.parseIdx(idxBody)
|
||||
filtered := d.filterEntries(entries)
|
||||
if len(filtered) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var totalBytes int64
|
||||
type chunk struct{ start, end int64 }
|
||||
chunks := make([]chunk, 0, len(filtered))
|
||||
|
||||
for _, entry := range filtered {
|
||||
var endByte int64 = -1
|
||||
for j, e := range entries {
|
||||
if e.ByteStart == entry.ByteStart && j+1 < len(entries) {
|
||||
endByte = entries[j+1].ByteStart - 1
|
||||
break
|
||||
}
|
||||
}
|
||||
chunks = append(chunks, chunk{entry.ByteStart, endByte})
|
||||
if endByte > 0 {
|
||||
totalBytes += (endByte - entry.ByteStart + 1)
|
||||
}
|
||||
}
|
||||
|
||||
f, err := os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var downloaded int64
|
||||
|
||||
err = func() error {
|
||||
defer f.Close()
|
||||
bufWriter := bufio.NewWriterSize(f, 1024*1024)
|
||||
|
||||
for i, c := range chunks {
|
||||
countingWriter := &proxyWriter{
|
||||
Writer: bufWriter,
|
||||
OnWrite: func(n int) {
|
||||
downloaded += int64(n)
|
||||
if totalBytes > 0 && i%20 == 0 {
|
||||
pct := float64(downloaded) / float64(totalBytes) * 100
|
||||
log.Ctx(ctx).Debug("download progress",
|
||||
zap.Int("step", step),
|
||||
zap.String("pct", fmt.Sprintf("%.1f%%", pct)))
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
if err := d.downloadRange(ctx, url, c.start, c.end, countingWriter); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return bufWriter.Flush()
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
f.Close()
|
||||
os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
|
||||
return d.safeRename(tmp, dst)
|
||||
}
|
||||
|
||||
type proxyWriter struct {
|
||||
io.Writer
|
||||
OnWrite func(int)
|
||||
}
|
||||
|
||||
func (p *proxyWriter) Write(data []byte) (int, error) {
|
||||
n, err := p.Writer.Write(data)
|
||||
if n > 0 && p.OnWrite != nil {
|
||||
p.OnWrite(n)
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (d *PartialDownloader) safeRename(src, dst string) error {
|
||||
var lastErr error
|
||||
for i := 0; i < 5; i++ {
|
||||
if err := os.Rename(src, dst); err == nil {
|
||||
return nil
|
||||
} else {
|
||||
lastErr = err
|
||||
}
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
}
|
||||
return fmt.Errorf("rename failed: %w", lastErr)
|
||||
}
|
||||
|
||||
// Run запускает загрузку всех необходимых файлов (pgrb2 + pgrb2b)
|
||||
func (d *PartialDownloader) Run(ctx context.Context, run time.Time) error {
|
||||
log.Ctx(ctx).Info("starting partial download",
|
||||
zap.Time("run", run),
|
||||
zap.Strings("variables", d.Variables))
|
||||
|
||||
g, ctx := errgroup.WithContext(ctx)
|
||||
sem := make(chan struct{}, d.Parallel)
|
||||
steps := d.ds.Steps()
|
||||
|
||||
for _, step := range steps {
|
||||
step := step
|
||||
|
||||
// Download primary pgrb2
|
||||
sem <- struct{}{}
|
||||
g.Go(func() error {
|
||||
defer func() { <-sem }()
|
||||
url := d.ds.GribURL(run, step)
|
||||
dst := filepath.Join(d.Dir, d.ds.FileName(run, step))
|
||||
return d.downloadFieldsFromURL(ctx, url, dst, step)
|
||||
})
|
||||
|
||||
// Download secondary pgrb2b
|
||||
sem <- struct{}{}
|
||||
g.Go(func() error {
|
||||
defer func() { <-sem }()
|
||||
url := d.ds.GribURLB(run, step)
|
||||
dst := filepath.Join(d.Dir, d.ds.FileNameB(run, step))
|
||||
return d.downloadFieldsFromURL(ctx, url, dst, step)
|
||||
})
|
||||
}
|
||||
|
||||
return g.Wait()
|
||||
}
|
||||
|
||||
// GetLatestModelRun находит последний доступный прогноз GFS
|
||||
func GetLatestModelRun(ctx context.Context, dc *DatasetConfig) (time.Time, error) {
|
||||
now := time.Now().UTC()
|
||||
hour := now.Hour() - (now.Hour() % 6)
|
||||
current := time.Date(now.Year(), now.Month(), now.Day(), hour, 0, 0, 0, time.UTC)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
for i := 0; i < 8; i++ {
|
||||
url := dc.GribURL(current, dc.MaxHour)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
|
||||
if err != nil {
|
||||
current = current.Add(-6 * time.Hour)
|
||||
continue
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err == nil && resp.StatusCode == http.StatusOK {
|
||||
resp.Body.Close()
|
||||
log.Ctx(ctx).Info("found latest model run", zap.Time("run", current))
|
||||
return current, nil
|
||||
}
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
|
||||
current = current.Add(-6 * time.Hour)
|
||||
}
|
||||
|
||||
return time.Time{}, errcodes.Wrap(errcodes.ErrDownload, "no recent GFS forecast found")
|
||||
}
|
||||
|
|
@ -2,15 +2,6 @@ package grib
|
|||
|
||||
import "math"
|
||||
|
||||
// 47 pressure levels matching tawhiri configuration
|
||||
var pressureLevels = []float64{
|
||||
1000, 975, 950, 925, 900, 875, 850, 825, 800, 775,
|
||||
750, 725, 700, 675, 650, 625, 600, 575, 550, 525,
|
||||
500, 475, 450, 425, 400, 375, 350, 325, 300, 275,
|
||||
250, 225, 200, 175, 150, 125, 100, 70, 50, 30,
|
||||
20, 10, 7, 5, 3, 2, 1,
|
||||
}
|
||||
|
||||
func pressureFromAlt(alt float64) float64 { // ICAO ISA
|
||||
return 1013.25 * math.Pow(1-alt/44307.69396, 5.255877)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,265 +0,0 @@
|
|||
package grib
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
|
||||
"github.com/aws/aws-sdk-go-v2/aws"
|
||||
"github.com/aws/aws-sdk-go-v2/config"
|
||||
"github.com/aws/aws-sdk-go-v2/service/s3"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
// S3Downloader downloads GRIB files from AWS S3
|
||||
type S3Downloader struct {
|
||||
Dir string
|
||||
Parallel int
|
||||
Bucket string
|
||||
Region string
|
||||
Client *s3.Client
|
||||
}
|
||||
|
||||
// NewS3Downloader creates a new S3 downloader with anonymous access
|
||||
func NewS3Downloader(dir string, parallel int, bucket, region string) (*S3Downloader, error) {
|
||||
// Create AWS config with anonymous credentials for public bucket
|
||||
cfg, err := config.LoadDefaultConfig(context.Background(),
|
||||
config.WithRegion(region),
|
||||
config.WithCredentialsProvider(aws.AnonymousCredentials{}),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, errcodes.Wrap(err, "failed to load AWS config")
|
||||
}
|
||||
|
||||
client := s3.NewFromConfig(cfg)
|
||||
|
||||
return &S3Downloader{
|
||||
Dir: dir,
|
||||
Parallel: parallel,
|
||||
Bucket: bucket,
|
||||
Region: region,
|
||||
Client: client,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// s3Key generates the S3 key for a GRIB file
|
||||
// Path format: gfs.YYYYMMDD/HH/atmos/gfs.tHHz.pgrb2.0p50.fFFF
|
||||
func (d *S3Downloader) s3Key(run string, hour int, step int) string {
|
||||
return fmt.Sprintf("gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p50.f%03d", run, hour, hour, step)
|
||||
}
|
||||
|
||||
// CheckFileExists checks if a file exists in S3 using HeadObject
|
||||
func (d *S3Downloader) CheckFileExists(ctx context.Context, key string) (bool, int64, error) {
|
||||
input := &s3.HeadObjectInput{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Key: aws.String(key),
|
||||
}
|
||||
|
||||
result, err := d.Client.HeadObject(ctx, input)
|
||||
if err != nil {
|
||||
// Check if error is NotFound
|
||||
// AWS SDK v2 doesn't export specific error types, check error string
|
||||
if isNotFoundError(err) {
|
||||
return false, 0, nil
|
||||
}
|
||||
return false, 0, errcodes.Wrap(err, "failed to check file existence")
|
||||
}
|
||||
|
||||
size := int64(0)
|
||||
if result.ContentLength != nil {
|
||||
size = *result.ContentLength
|
||||
}
|
||||
|
||||
return true, size, nil
|
||||
}
|
||||
|
||||
// isNotFoundError checks if error is a NotFound error
|
||||
func isNotFoundError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
// AWS SDK v2 error handling
|
||||
errStr := err.Error()
|
||||
return contains(errStr, "NotFound") || contains(errStr, "404") || contains(errStr, "NoSuchKey")
|
||||
}
|
||||
|
||||
func contains(s, substr string) bool {
|
||||
return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && findSubstring(s, substr))
|
||||
}
|
||||
|
||||
func findSubstring(s, substr string) bool {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ListAvailableFiles lists all available files for a given run
|
||||
func (d *S3Downloader) ListAvailableFiles(ctx context.Context, run string, hour int) ([]string, error) {
|
||||
prefix := fmt.Sprintf("gfs.%s/%02d/atmos/", run, hour)
|
||||
|
||||
input := &s3.ListObjectsV2Input{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Prefix: aws.String(prefix),
|
||||
}
|
||||
|
||||
var files []string
|
||||
paginator := s3.NewListObjectsV2Paginator(d.Client, input)
|
||||
|
||||
for paginator.HasMorePages() {
|
||||
page, err := paginator.NextPage(ctx)
|
||||
if err != nil {
|
||||
return nil, errcodes.Wrap(err, "failed to list S3 objects")
|
||||
}
|
||||
|
||||
for _, obj := range page.Contents {
|
||||
if obj.Key != nil {
|
||||
files = append(files, *obj.Key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// fetchFromS3 downloads a file from S3 to local disk with retry logic
|
||||
func (d *S3Downloader) fetchFromS3(ctx context.Context, key, dst string) (err error) {
|
||||
// Check if final file already exists
|
||||
if _, err := os.Stat(dst); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
const maxRetries = 3
|
||||
var lastErr error
|
||||
|
||||
for attempt := 0; attempt < maxRetries; attempt++ {
|
||||
if attempt > 0 {
|
||||
// Exponential backoff: 2s, 4s, 8s
|
||||
waitTime := time.Duration(1<<uint(attempt)) * time.Second
|
||||
time.Sleep(waitTime)
|
||||
}
|
||||
|
||||
lastErr = d.fetchFromS3Once(ctx, key, dst)
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return errcodes.Wrap(lastErr, fmt.Sprintf("failed after %d retries", maxRetries))
|
||||
}
|
||||
|
||||
// fetchFromS3Once performs a single download attempt
|
||||
func (d *S3Downloader) fetchFromS3Once(ctx context.Context, key, dst string) (err error) {
|
||||
tmp := dst + ".part"
|
||||
|
||||
// Remove old .part file if it exists
|
||||
os.Remove(tmp)
|
||||
|
||||
f, err := os.Create(tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fileClosed := false
|
||||
// Cleanup .part file on any error (using named return value)
|
||||
defer func() {
|
||||
if !fileClosed {
|
||||
f.Close()
|
||||
}
|
||||
if err != nil {
|
||||
os.Remove(tmp)
|
||||
}
|
||||
}()
|
||||
|
||||
// Check if file exists in S3
|
||||
exists, size, checkErr := d.CheckFileExists(ctx, key)
|
||||
if checkErr != nil {
|
||||
return errcodes.Wrap(checkErr, "failed to check S3 file existence")
|
||||
}
|
||||
if !exists {
|
||||
return errcodes.Wrap(errcodes.ErrDownload, fmt.Sprintf("file not found in S3: %s", key))
|
||||
}
|
||||
|
||||
// Download from S3
|
||||
input := &s3.GetObjectInput{
|
||||
Bucket: aws.String(d.Bucket),
|
||||
Key: aws.String(key),
|
||||
}
|
||||
|
||||
result, err := d.Client.GetObject(ctx, input)
|
||||
if err != nil {
|
||||
return errcodes.Wrap(err, "failed to get S3 object")
|
||||
}
|
||||
defer result.Body.Close()
|
||||
|
||||
// Copy to local file
|
||||
written, err := io.Copy(f, result.Body)
|
||||
if err != nil {
|
||||
return errcodes.Wrap(err, fmt.Sprintf("failed to write S3 object to file %s", dst))
|
||||
}
|
||||
|
||||
// Verify size if available
|
||||
if size > 0 && written != size {
|
||||
return errcodes.Wrap(errcodes.ErrDownload, fmt.Sprintf("size mismatch: got %d bytes, expected %d", written, size))
|
||||
}
|
||||
|
||||
// Close file before rename
|
||||
if err := f.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
fileClosed = true
|
||||
|
||||
// If rename fails, err will be set and defer will cleanup .part file
|
||||
return os.Rename(tmp, dst)
|
||||
}
|
||||
|
||||
// Run downloads all required GRIB files for a forecast run
|
||||
func (d *S3Downloader) Run(ctx context.Context, run time.Time) error {
|
||||
runStr := run.Format("20060102")
|
||||
hour := run.Hour()
|
||||
|
||||
// First, list available files to verify they exist
|
||||
availableFiles, err := d.ListAvailableFiles(ctx, runStr, hour)
|
||||
if err != nil {
|
||||
return errcodes.Wrap(err, "failed to list available files")
|
||||
}
|
||||
|
||||
if len(availableFiles) == 0 {
|
||||
return errcodes.Wrap(errcodes.ErrDownload, fmt.Sprintf("no files found for run %s/%02d", runStr, hour))
|
||||
}
|
||||
|
||||
// Build a map of available files for quick lookup
|
||||
availableMap := make(map[string]bool)
|
||||
for _, file := range availableFiles {
|
||||
availableMap[file] = true
|
||||
}
|
||||
|
||||
g, ctx := errgroup.WithContext(ctx)
|
||||
sem := make(chan struct{}, d.Parallel)
|
||||
|
||||
for _, step := range steps {
|
||||
step := step
|
||||
key := d.s3Key(runStr, hour, step)
|
||||
|
||||
// Check if file is available in S3
|
||||
if !availableMap[key] {
|
||||
// Log warning but don't fail - some forecast hours might not be available yet
|
||||
continue
|
||||
}
|
||||
|
||||
sem <- struct{}{}
|
||||
g.Go(func() error {
|
||||
defer func() { <-sem }()
|
||||
dst := filepath.Join(d.Dir, fileName(run, step))
|
||||
return d.fetchFromS3(ctx, key, dst)
|
||||
})
|
||||
}
|
||||
|
||||
return g.Wait()
|
||||
}
|
||||
|
|
@ -6,25 +6,11 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
// Generate steps from 0 to 96 with step 3 hours (33 steps total)
|
||||
// GFS provides 3-hourly data for 0-120 hours, we use first 96 hours (0, 3, 6, ..., 96)
|
||||
var steps = func() []int {
|
||||
result := make([]int, 0, 33)
|
||||
for i := 0; i <= 96; i += 3 {
|
||||
result = append(result, i)
|
||||
}
|
||||
return result
|
||||
}()
|
||||
|
||||
func nearestRun(t time.Time) time.Time {
|
||||
h := t.UTC().Hour() - t.UTC().Hour()%6
|
||||
return time.Date(t.Year(), t.Month(), t.Day(), h, 0, 0, 0, time.UTC)
|
||||
}
|
||||
|
||||
func fileName(run time.Time, step int) string {
|
||||
return fmt.Sprintf("gfs.t%02dz.pgrb2.0p50.f%03d", run.Hour(), step)
|
||||
}
|
||||
|
||||
func encodeKey(a ...any) uint64 {
|
||||
h := fnv.New64a()
|
||||
for _, v := range a {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue