This commit is contained in:
Anatoly Antonov 2026-05-18 03:17:17 +09:00
parent 7a8d5d13fa
commit 9e663db9dc
68 changed files with 5647 additions and 2958 deletions

View file

@ -0,0 +1,141 @@
package gfs
import "fmt"
// Dataset shape: (hour, pressure_level, variable, latitude, longitude).
// Matches the cube layout used by the reference Tawhiri implementation.
const (
NumHours = 65 // 0, 3, 6, ..., 192 hours forecast
NumLevels = 47 // pressure levels
NumVariables = 3 // geopotential height, U-wind, V-wind
NumLatitudes = 361 // -90.0 to +90.0 inclusive in 0.5° steps
NumLongitudes = 720 // 0.0 to 359.5 in 0.5° steps
HourStep = 3
MaxHour = 192
Resolution = 0.5
LatStart = -90.0
LonStart = 0.0
VarHeight = 0
VarWindU = 1
VarWindV = 2
ElementSize = 4 // float32
// DatasetSize is the canonical file size: every grid cell × element size.
DatasetSize int64 = int64(NumHours) * int64(NumLevels) * int64(NumVariables) *
int64(NumLatitudes) * int64(NumLongitudes) * int64(ElementSize)
)
// LevelSet identifies which GRIB file (primary/secondary) carries a level.
type LevelSet int
const (
LevelSetA LevelSet = iota // pgrb2 — primary file
LevelSetB // pgrb2b — secondary file
)
// Pressures lists the 47 pressure levels (hPa) in dataset index order,
// descending from surface to top of atmosphere.
var Pressures = [NumLevels]int{
1000, 975, 950, 925, 900, 875, 850, 825, 800, 775,
750, 725, 700, 675, 650, 625, 600, 575, 550, 525,
500, 475, 450, 425, 400, 375, 350, 325, 300, 275,
250, 225, 200, 175, 150, 125, 100, 70, 50, 30,
20, 10, 7, 5, 3, 2, 1,
}
// PressuresPgrb2 lists the levels carried by the primary GRIB file.
var PressuresPgrb2 = []int{
10, 20, 30, 50, 70, 100, 150, 200, 250, 300, 350, 400,
450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 925,
950, 975, 1000,
}
// PressuresPgrb2b lists the levels carried by the secondary GRIB file.
var PressuresPgrb2b = []int{
1, 2, 3, 5, 7, 125, 175, 225, 275, 325, 375, 425,
475, 525, 575, 625, 675, 725, 775, 825, 875,
}
var pressureIndex map[int]int
var pressureLevelSet map[int]LevelSet
func init() {
pressureIndex = make(map[int]int, NumLevels)
for i, p := range Pressures {
pressureIndex[p] = i
}
pressureLevelSet = make(map[int]LevelSet, NumLevels)
for _, p := range PressuresPgrb2 {
pressureLevelSet[p] = LevelSetA
}
for _, p := range PressuresPgrb2b {
pressureLevelSet[p] = LevelSetB
}
}
// PressureIndex returns the dataset index for a pressure level in hPa,
// or -1 when the level is unknown.
func PressureIndex(hPa int) int {
idx, ok := pressureIndex[hPa]
if !ok {
return -1
}
return idx
}
// PressureLevelSet returns the GRIB file set carrying a pressure level.
func PressureLevelSet(hPa int) (LevelSet, bool) {
ls, ok := pressureLevelSet[hPa]
return ls, ok
}
// HourIndex returns the dataset time index for a forecast hour, or -1 when
// the hour is outside the range or not a multiple of HourStep.
func HourIndex(hour int) int {
if hour < 0 || hour > MaxHour || hour%HourStep != 0 {
return -1
}
return hour / HourStep
}
// Hours returns the full list of forecast hours, [0, 3, 6, ..., MaxHour].
func Hours() []int {
out := make([]int, 0, NumHours)
for h := 0; h <= MaxHour; h += HourStep {
out = append(out, h)
}
return out
}
// VariableIndex maps a GRIB (category, number) pair to a dataset variable
// index, returning -1 for parameters this dataset does not store.
func VariableIndex(parameterCategory, parameterNumber int) int {
switch {
case parameterCategory == 3 && parameterNumber == 5:
return VarHeight
case parameterCategory == 2 && parameterNumber == 2:
return VarWindU
case parameterCategory == 2 && parameterNumber == 3:
return VarWindV
default:
return -1
}
}
// S3 URL configuration for NOAA GFS data on the public S3 mirror.
const S3BaseURL = "https://noaa-gfs-bdp-pds.s3.amazonaws.com"
// GribURL returns the S3 URL for a primary (pgrb2) GRIB file.
func GribURL(date string, runHour, forecastStep int) string {
return fmt.Sprintf("%s/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p50.f%03d",
S3BaseURL, date, runHour, runHour, forecastStep)
}
// GribURLB returns the S3 URL for a secondary (pgrb2b) GRIB file.
func GribURLB(date string, runHour, forecastStep int) string {
return fmt.Sprintf("%s/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2b.0p50.f%03d",
S3BaseURL, date, runHour, runHour, forecastStep)
}

View file

@ -0,0 +1,150 @@
package gfs
import (
"encoding/binary"
"fmt"
"math"
"os"
"time"
mmap "github.com/edsrzf/mmap-go"
)
// File is an mmap-backed wind dataset file. The layout is a flat C-order
// row-major array of float32 values, shape (hour, level, variable, lat, lng).
type File struct {
mm mmap.MMap
file *os.File
writable bool
// Epoch is the forecast run time (UTC) the file represents.
Epoch time.Time
}
// Open opens an existing dataset file for reading.
func Open(path string, epoch time.Time) (*File, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open dataset: %w", err)
}
info, err := f.Stat()
if err != nil {
f.Close()
return nil, fmt.Errorf("stat dataset: %w", err)
}
if info.Size() != DatasetSize {
f.Close()
return nil, fmt.Errorf("dataset should be %d bytes (was %d)", DatasetSize, info.Size())
}
mm, err := mmap.Map(f, mmap.RDONLY, 0)
if err != nil {
f.Close()
return nil, fmt.Errorf("mmap dataset: %w", err)
}
return &File{mm: mm, file: f, writable: false, Epoch: epoch}, nil
}
// Create creates a new dataset file of the canonical size, mmap'd read-write.
func Create(path string) (*File, error) {
f, err := os.Create(path)
if err != nil {
return nil, fmt.Errorf("create dataset: %w", err)
}
if err := f.Truncate(DatasetSize); err != nil {
f.Close()
return nil, fmt.Errorf("truncate dataset: %w", err)
}
mm, err := mmap.MapRegion(f, int(DatasetSize), mmap.RDWR, 0, 0)
if err != nil {
f.Close()
return nil, fmt.Errorf("mmap dataset: %w", err)
}
return &File{mm: mm, file: f, writable: true}, nil
}
// OpenWritable opens an existing dataset file for read-write access.
// Used when resuming a partial download.
func OpenWritable(path string) (*File, error) {
f, err := os.OpenFile(path, os.O_RDWR, 0o644)
if err != nil {
return nil, fmt.Errorf("open dataset rw: %w", err)
}
info, err := f.Stat()
if err != nil {
f.Close()
return nil, fmt.Errorf("stat dataset: %w", err)
}
if info.Size() != DatasetSize {
f.Close()
return nil, fmt.Errorf("dataset should be %d bytes (was %d)", DatasetSize, info.Size())
}
mm, err := mmap.MapRegion(f, int(DatasetSize), mmap.RDWR, 0, 0)
if err != nil {
f.Close()
return nil, fmt.Errorf("mmap dataset: %w", err)
}
return &File{mm: mm, file: f, writable: true}, nil
}
// offset returns the byte offset of the [hour][level][variable][lat][lng] cell.
func offset(hour, level, variable, lat, lng int) int64 {
idx := int64(hour)
idx = idx*int64(NumLevels) + int64(level)
idx = idx*int64(NumVariables) + int64(variable)
idx = idx*int64(NumLatitudes) + int64(lat)
idx = idx*int64(NumLongitudes) + int64(lng)
return idx * int64(ElementSize)
}
// Val reads one cell as a float32.
func (d *File) Val(hour, level, variable, lat, lng int) float32 {
off := offset(hour, level, variable, lat, lng)
return math.Float32frombits(binary.LittleEndian.Uint32(d.mm[off : off+4]))
}
// SetVal writes one cell. Only valid on writable files.
func (d *File) SetVal(hour, level, variable, lat, lng int, val float32) {
off := offset(hour, level, variable, lat, lng)
binary.LittleEndian.PutUint32(d.mm[off:off+4], math.Float32bits(val))
}
// BlitGribData copies one decoded GRIB grid into the dataset, flipping the
// latitude axis from GRIB's north-to-south scan order to our south-to-north
// storage order. gribData must be 361*720 = 259920 float64 values.
func (d *File) BlitGribData(hourIdx, levelIdx, varIdx int, gribData []float64) error {
expected := NumLatitudes * NumLongitudes
if len(gribData) != expected {
return fmt.Errorf("grib data has %d values, expected %d", len(gribData), expected)
}
for lat := range NumLatitudes {
for lng := range NumLongitudes {
gribIdx := (360-lat)*NumLongitudes + lng
d.SetVal(hourIdx, levelIdx, varIdx, lat, lng, float32(gribData[gribIdx]))
}
}
return nil
}
// Flush flushes the mmap to disk.
func (d *File) Flush() error {
if d.mm != nil {
return d.mm.Flush()
}
return nil
}
// Close unmaps and closes the file.
func (d *File) Close() error {
if d.mm != nil {
if err := d.mm.Unmap(); err != nil {
d.file.Close()
return fmt.Errorf("unmap: %w", err)
}
d.mm = nil
}
if d.file != nil {
err := d.file.Close()
d.file = nil
return err
}
return nil
}

View file

@ -0,0 +1,109 @@
package gfs
import (
"time"
"predictor-refactored/internal/numerics"
"predictor-refactored/internal/weather"
)
// Wind is a WindField backed by a GFS dataset file.
type Wind struct {
file *File
}
// NewWind returns a Wind backed by file.
func NewWind(file *File) *Wind {
return &Wind{file: file}
}
// Epoch returns the forecast run time of the underlying file.
func (w *Wind) Epoch() time.Time { return w.file.Epoch }
// Source returns the source identifier "noaa-gfs-0p50".
func (w *Wind) Source() string { return "noaa-gfs-0p50" }
// Close releases the underlying file's resources.
func (w *Wind) Close() error { return w.file.Close() }
// Grid axes for the GFS 0.5-degree dataset.
var (
hourAxis = numerics.Axis{
Left: 0,
Step: float64(HourStep),
N: NumHours,
Name: "hour",
}
latAxis = numerics.Axis{
Left: LatStart,
Step: Resolution,
N: NumLatitudes,
Name: "lat",
}
lngAxis = numerics.Axis{
Left: LonStart,
Step: Resolution,
N: NumLongitudes,
Wrap: true,
Name: "lng",
}
)
// Wind samples the field at the given UNIX time, geographic coordinate, and
// altitude. Vertical interpolation matches Tawhiri: locate the two pressure
// levels whose interpolated geopotential heights bracket alt, then linearly
// interpolate U and V between them.
func (w *Wind) Wind(t, lat, lng, alt float64) (weather.Sample, error) {
hours := (t - float64(w.file.Epoch.Unix())) / 3600.0
bh, err := hourAxis.Locate(hours)
if err != nil {
return weather.Sample{}, err
}
bla, err := latAxis.Locate(lat)
if err != nil {
return weather.Sample{}, err
}
bln, err := lngAxis.Locate(lng)
if err != nil {
return weather.Sample{}, err
}
bs := [3]numerics.Bracket{bh, bla, bln}
height := func(level int) func(i, j, k int) float64 {
return func(i, j, k int) float64 {
return float64(w.file.Val(i, level, VarHeight, j, k))
}
}
levelIdx := numerics.Bisect(0, NumLevels-2, alt, func(level int) float64 {
return numerics.EvalTrilinear(bs, height(level))
})
lowerHGT := numerics.EvalTrilinear(bs, height(levelIdx))
upperHGT := numerics.EvalTrilinear(bs, height(levelIdx+1))
var altFrac float64
if lowerHGT != upperHGT {
altFrac = (upperHGT - alt) / (upperHGT - lowerHGT)
} else {
altFrac = 0.5
}
component := func(level, variable int) float64 {
return numerics.EvalTrilinear(bs, func(i, j, k int) float64 {
return float64(w.file.Val(i, level, variable, j, k))
})
}
lowerU := component(levelIdx, VarWindU)
upperU := component(levelIdx+1, VarWindU)
lowerV := component(levelIdx, VarWindV)
upperV := component(levelIdx+1, VarWindV)
return weather.Sample{
U: lowerU*altFrac + upperU*(1-altFrac),
V: lowerV*altFrac + upperV*(1-altFrac),
AboveModel: altFrac < 0,
}, nil
}

37
internal/weather/types.go Normal file
View file

@ -0,0 +1,37 @@
// Package weather defines the abstract interface trajectory engines use
// to sample atmospheric data, and contains source-specific implementations
// in its subpackages.
package weather
import "time"
// Sample is the result of sampling a wind field at one point.
type Sample struct {
// U is the eastward wind component in m/s.
U float64
// V is the northward wind component in m/s.
V float64
// AboveModel is set when the query altitude was above the highest
// pressure level represented in the underlying dataset. The returned
// U/V values are linear extrapolations and should be treated as unreliable.
AboveModel bool
}
// WindField provides 3D wind data interpolated at arbitrary points.
//
// Implementations must be safe for concurrent use.
type WindField interface {
// Wind samples the field at (t, lat, lng, alt).
//
// t is UNIX seconds. lat is in degrees, -90 to +90. lng is in degrees,
// 0 to 360 (callers must normalize). alt is metres above mean sea level.
//
// Returns an error if any coordinate is outside the field's domain.
Wind(t, lat, lng, alt float64) (Sample, error)
// Epoch returns the time the field is anchored to (forecast run time).
Epoch() time.Time
// Source identifies the underlying dataset for logs and metrics.
Source() string
}