predictor/internal/datasets/gfs/source.go
2026-05-23 00:55:35 +09:00

138 lines
4.1 KiB
Go

// Package gfs implements datasets.Source for NOAA GFS forecasts.
//
// The package serves multiple GFS variants (0.5° 3-hour, 0.25° 3-hour,
// 0.25° 1-hour); the variant is selected at construction time. The
// download skeleton (HTTP, idx parsing, parallel blit) lives in
// internal/datasets/grib; this package only supplies URL templating and
// the Source-interface plumbing.
package gfs
import (
"context"
"fmt"
"net/http"
"time"
"go.uber.org/zap"
"predictor-refactored/internal/datasets"
"predictor-refactored/internal/datasets/grib"
"predictor-refactored/internal/weather"
wgfs "predictor-refactored/internal/weather/gfs"
)
// Source is the GFS implementation of datasets.Source.
type Source struct {
Variant *wgfs.Variant
Parallel int
Client *http.Client
Log *zap.Logger
}
// NewSource returns a default Source over variant. If variant is nil,
// GFS 0.5° 3-hour is used (the historical Tawhiri default).
func NewSource(variant *wgfs.Variant, log *zap.Logger) *Source {
if variant == nil {
variant = wgfs.GFS0p50_3h
}
return &Source{
Variant: variant,
Parallel: 8,
Client: &http.Client{Timeout: 2 * time.Minute},
Log: log,
}
}
// ID returns the variant's ID.
func (s *Source) ID() string { return s.Variant.ID }
func (s *Source) downloader() *grib.Downloader {
return &grib.Downloader{
Variant: s.Variant,
URLs: s.url,
Parallel: s.Parallel,
Client: s.Client,
Log: s.Log,
}
}
// url generates the GFS URL for one (date, runHour, _, step, levelSet).
// member is unused for GFS.
func (s *Source) url(date string, runHour, _, step int, ls wgfs.LevelSet) string {
if ls == wgfs.LevelSetB {
return s.Variant.GribURLB(date, runHour, step)
}
return s.Variant.GribURL(date, runHour, step)
}
// LatestEpoch returns the most recent run NOAA has finished publishing.
func (s *Source) LatestEpoch(ctx context.Context) (time.Time, error) {
now := time.Now().UTC()
hour := now.Hour() - (now.Hour() % 6)
current := time.Date(now.Year(), now.Month(), now.Day(), hour, 0, 0, 0, time.UTC)
client := s.Client
if client == nil {
client = &http.Client{Timeout: 2 * time.Minute}
}
log := s.Log
if log == nil {
log = zap.NewNop()
}
for range 8 {
date := current.Format("20060102")
url := s.Variant.GribURL(date, current.Hour(), s.Variant.MaxHour) + ".idx"
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
if err == nil {
resp, err := client.Do(req)
if err == nil {
resp.Body.Close()
if resp.StatusCode == http.StatusOK {
log.Info("latest run discovered",
zap.String("variant", s.Variant.ID),
zap.Time("run", current),
zap.String("verified_url", url))
return current, nil
}
}
}
current = current.Add(-6 * time.Hour)
}
return time.Time{}, fmt.Errorf("no recent %s run found (checked 8 runs)", s.Variant.ID)
}
// Coverage returns the geographic and temporal extent of id.
func (s *Source) Coverage(id datasets.DatasetID) datasets.Coverage {
v := s.Variant
cov := datasets.Coverage{
Region: datasets.Region{MinLat: -90, MaxLat: 90, MinLng: 0, MaxLng: 360},
StartTime: id.Epoch,
EndTime: id.Epoch.Add(time.Duration(v.MaxHour) * time.Hour),
}
if r := id.Subset.Region; r != nil {
cov.Region = *r
}
if h := id.Subset.HourRange; h != nil {
cov.StartTime = id.Epoch.Add(time.Duration(h.MinHour) * time.Hour)
cov.EndTime = id.Epoch.Add(time.Duration(h.MaxHour) * time.Hour)
}
return cov
}
// Open loads a stored dataset as a WindField.
func (s *Source) Open(_ context.Context, id datasets.DatasetID, store datasets.Storage) (weather.WindField, error) {
if !store.Exists(id) {
return nil, fmt.Errorf("dataset %s not found", id.Filename())
}
file, err := wgfs.Open(store.Path(id), s.Variant, id.Epoch.UTC())
if err != nil {
return nil, err
}
return wgfs.NewWind(file), nil
}
// Download fetches the dataset for id. GFS ignores Subset.Members.
func (s *Source) Download(ctx context.Context, id datasets.DatasetID, store datasets.Storage, prog datasets.ProgressSink, throttle datasets.Throttle) error {
return s.downloader().Run(ctx, id, 0, store, prog, throttle)
}