forked from gsn/predictor
feat: refactor
This commit is contained in:
parent
82ef1cb3b8
commit
51bbf3c579
44 changed files with 8589 additions and 0 deletions
158
internal/dataset/dataset.go
Normal file
158
internal/dataset/dataset.go
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
package dataset
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Dataset shape constants.
|
||||
// Shape: (65, 47, 3, 361, 720) = (hour, pressure_level, variable, latitude, longitude)
|
||||
// This matches the reference predictor exactly.
|
||||
const (
|
||||
NumHours = 65 // 0, 3, 6, ..., 192
|
||||
NumLevels = 47 // pressure levels
|
||||
NumVariables = 3 // height, wind_u, wind_v
|
||||
NumLatitudes = 361 // -90.0 to +90.0 in 0.5 degree steps
|
||||
NumLongitudes = 720 // 0.0 to 359.5 in 0.5 degree steps
|
||||
|
||||
HourStep = 3 // hours between forecast time steps
|
||||
MaxHour = 192 // maximum forecast hour
|
||||
Resolution = 0.5 // grid resolution in degrees
|
||||
LatStart = -90.0 // first latitude in the dataset
|
||||
LonStart = 0.0 // first longitude in the dataset
|
||||
|
||||
// Variable indices within the dataset.
|
||||
VarHeight = 0
|
||||
VarWindU = 1
|
||||
VarWindV = 2
|
||||
|
||||
ElementSize = 4 // float32 = 4 bytes
|
||||
)
|
||||
|
||||
// DatasetSize is the total size of the dataset file in bytes.
|
||||
// 65 * 47 * 3 * 361 * 720 * 4 = 9,528,667,200
|
||||
const DatasetSize int64 = int64(NumHours) * int64(NumLevels) * int64(NumVariables) *
|
||||
int64(NumLatitudes) * int64(NumLongitudes) * int64(ElementSize)
|
||||
|
||||
// LevelSet identifies which GRIB file set a pressure level belongs to.
|
||||
type LevelSet int
|
||||
|
||||
const (
|
||||
LevelSetA LevelSet = iota // pgrb2 (primary)
|
||||
LevelSetB // pgrb2b (secondary)
|
||||
)
|
||||
|
||||
// Pressures contains the 47 pressure levels in hPa, sorted descending.
|
||||
// Index 0 = 1000 hPa (near surface), Index 46 = 1 hPa (high atmosphere).
|
||||
var Pressures = [NumLevels]int{
|
||||
1000, 975, 950, 925, 900, 875, 850, 825, 800, 775,
|
||||
750, 725, 700, 675, 650, 625, 600, 575, 550, 525,
|
||||
500, 475, 450, 425, 400, 375, 350, 325, 300, 275,
|
||||
250, 225, 200, 175, 150, 125, 100, 70, 50, 30,
|
||||
20, 10, 7, 5, 3, 2, 1,
|
||||
}
|
||||
|
||||
// pressureIndex maps pressure in hPa to its index in the Pressures array.
|
||||
var pressureIndex map[int]int
|
||||
|
||||
// pressureLevelSet maps pressure in hPa to its GRIB file set.
|
||||
var pressureLevelSet map[int]LevelSet
|
||||
|
||||
func init() {
|
||||
pressureIndex = make(map[int]int, NumLevels)
|
||||
for i, p := range Pressures {
|
||||
pressureIndex[p] = i
|
||||
}
|
||||
|
||||
pressureLevelSet = make(map[int]LevelSet, NumLevels)
|
||||
for _, p := range PressuresPgrb2 {
|
||||
pressureLevelSet[p] = LevelSetA
|
||||
}
|
||||
for _, p := range PressuresPgrb2b {
|
||||
pressureLevelSet[p] = LevelSetB
|
||||
}
|
||||
}
|
||||
|
||||
// PressuresPgrb2 contains levels found in the primary pgrb2 file (26 levels).
|
||||
var PressuresPgrb2 = []int{
|
||||
10, 20, 30, 50, 70, 100, 150, 200, 250, 300, 350, 400,
|
||||
450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 925,
|
||||
950, 975, 1000,
|
||||
}
|
||||
|
||||
// PressuresPgrb2b contains levels found in the secondary pgrb2b file (21 levels).
|
||||
var PressuresPgrb2b = []int{
|
||||
1, 2, 3, 5, 7, 125, 175, 225, 275, 325, 375, 425,
|
||||
475, 525, 575, 625, 675, 725, 775, 825, 875,
|
||||
}
|
||||
|
||||
// PressureIndex returns the dataset index for a given pressure level in hPa.
|
||||
// Returns -1 if the level is not found.
|
||||
func PressureIndex(hPa int) int {
|
||||
idx, ok := pressureIndex[hPa]
|
||||
if !ok {
|
||||
return -1
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
// PressureLevelSet returns which GRIB file set a pressure level belongs to.
|
||||
func PressureLevelSet(hPa int) (LevelSet, bool) {
|
||||
ls, ok := pressureLevelSet[hPa]
|
||||
return ls, ok
|
||||
}
|
||||
|
||||
// HourIndex returns the dataset time index for a forecast hour.
|
||||
// Returns -1 if the hour is invalid (not a multiple of HourStep or out of range).
|
||||
func HourIndex(hour int) int {
|
||||
if hour < 0 || hour > MaxHour || hour%HourStep != 0 {
|
||||
return -1
|
||||
}
|
||||
return hour / HourStep
|
||||
}
|
||||
|
||||
// Hours returns all forecast hours as a slice: [0, 3, 6, ..., 192].
|
||||
func Hours() []int {
|
||||
out := make([]int, 0, NumHours)
|
||||
for h := 0; h <= MaxHour; h += HourStep {
|
||||
out = append(out, h)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// S3 URL configuration for NOAA GFS data.
|
||||
const S3BaseURL = "https://noaa-gfs-bdp-pds.s3.amazonaws.com"
|
||||
|
||||
// GribURL returns the S3 URL for a primary (pgrb2) GRIB file.
|
||||
func GribURL(date string, runHour, forecastStep int) string {
|
||||
return fmt.Sprintf("%s/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p50.f%03d",
|
||||
S3BaseURL, date, runHour, runHour, forecastStep)
|
||||
}
|
||||
|
||||
// GribURLB returns the S3 URL for a secondary (pgrb2b) GRIB file.
|
||||
func GribURLB(date string, runHour, forecastStep int) string {
|
||||
return fmt.Sprintf("%s/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2b.0p50.f%03d",
|
||||
S3BaseURL, date, runHour, runHour, forecastStep)
|
||||
}
|
||||
|
||||
// GribFileName returns the local filename for a primary GRIB file.
|
||||
func GribFileName(runHour, forecastStep int) string {
|
||||
return fmt.Sprintf("gfs.t%02dz.pgrb2.0p50.f%03d", runHour, forecastStep)
|
||||
}
|
||||
|
||||
// GribFileNameB returns the local filename for a secondary GRIB file.
|
||||
func GribFileNameB(runHour, forecastStep int) string {
|
||||
return fmt.Sprintf("gfs.t%02dz.pgrb2b.0p50.f%03d", runHour, forecastStep)
|
||||
}
|
||||
|
||||
// VariableIndex returns the dataset variable index for a GRIB parameter.
|
||||
// Returns -1 if the parameter is not recognized.
|
||||
func VariableIndex(parameterCategory, parameterNumber int) int {
|
||||
switch {
|
||||
case parameterCategory == 3 && parameterNumber == 5:
|
||||
return VarHeight // Geopotential Height
|
||||
case parameterCategory == 2 && parameterNumber == 2:
|
||||
return VarWindU // U-component of wind
|
||||
case parameterCategory == 2 && parameterNumber == 3:
|
||||
return VarWindV // V-component of wind
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
152
internal/dataset/dataset_test.go
Normal file
152
internal/dataset/dataset_test.go
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
package dataset
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDatasetShape(t *testing.T) {
|
||||
if NumHours != 65 {
|
||||
t.Errorf("NumHours = %d, want 65", NumHours)
|
||||
}
|
||||
if NumLevels != 47 {
|
||||
t.Errorf("NumLevels = %d, want 47", NumLevels)
|
||||
}
|
||||
if NumVariables != 3 {
|
||||
t.Errorf("NumVariables = %d, want 3", NumVariables)
|
||||
}
|
||||
if NumLatitudes != 361 {
|
||||
t.Errorf("NumLatitudes = %d, want 361", NumLatitudes)
|
||||
}
|
||||
if NumLongitudes != 720 {
|
||||
t.Errorf("NumLongitudes = %d, want 720", NumLongitudes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDatasetSize(t *testing.T) {
|
||||
// 65 * 47 * 3 * 361 * 720 * 4 = 9,528,667,200
|
||||
want := int64(9_528_667_200)
|
||||
if DatasetSize != want {
|
||||
t.Errorf("DatasetSize = %d, want %d", DatasetSize, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPressureLevels(t *testing.T) {
|
||||
if len(Pressures) != 47 {
|
||||
t.Fatalf("len(Pressures) = %d, want 47", len(Pressures))
|
||||
}
|
||||
|
||||
// First should be 1000 (highest pressure, near surface)
|
||||
if Pressures[0] != 1000 {
|
||||
t.Errorf("Pressures[0] = %d, want 1000", Pressures[0])
|
||||
}
|
||||
// Last should be 1 (lowest pressure, high atmosphere)
|
||||
if Pressures[46] != 1 {
|
||||
t.Errorf("Pressures[46] = %d, want 1", Pressures[46])
|
||||
}
|
||||
|
||||
// Should be sorted descending
|
||||
for i := 1; i < len(Pressures); i++ {
|
||||
if Pressures[i] >= Pressures[i-1] {
|
||||
t.Errorf("Pressures not descending at [%d]: %d >= %d", i, Pressures[i], Pressures[i-1])
|
||||
}
|
||||
}
|
||||
|
||||
// Total levels: 26 from pgrb2 + 21 from pgrb2b = 47
|
||||
if len(PressuresPgrb2) != 26 {
|
||||
t.Errorf("len(PressuresPgrb2) = %d, want 26", len(PressuresPgrb2))
|
||||
}
|
||||
if len(PressuresPgrb2b) != 21 {
|
||||
t.Errorf("len(PressuresPgrb2b) = %d, want 21", len(PressuresPgrb2b))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPressureIndex(t *testing.T) {
|
||||
if PressureIndex(1000) != 0 {
|
||||
t.Errorf("PressureIndex(1000) = %d, want 0", PressureIndex(1000))
|
||||
}
|
||||
if PressureIndex(1) != 46 {
|
||||
t.Errorf("PressureIndex(1) = %d, want 46", PressureIndex(1))
|
||||
}
|
||||
if PressureIndex(500) != 20 {
|
||||
t.Errorf("PressureIndex(500) = %d, want 20", PressureIndex(500))
|
||||
}
|
||||
if PressureIndex(9999) != -1 {
|
||||
t.Errorf("PressureIndex(9999) = %d, want -1", PressureIndex(9999))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPressureLevelSet(t *testing.T) {
|
||||
// 1000 mb should be in pgrb2 (A)
|
||||
ls, ok := PressureLevelSet(1000)
|
||||
if !ok || ls != LevelSetA {
|
||||
t.Errorf("PressureLevelSet(1000) = %v, %v; want A, true", ls, ok)
|
||||
}
|
||||
|
||||
// 125 mb should be in pgrb2b (B)
|
||||
ls, ok = PressureLevelSet(125)
|
||||
if !ok || ls != LevelSetB {
|
||||
t.Errorf("PressureLevelSet(125) = %v, %v; want B, true", ls, ok)
|
||||
}
|
||||
|
||||
// 1, 2, 3, 5, 7 should be in pgrb2b (B)
|
||||
for _, p := range []int{1, 2, 3, 5, 7} {
|
||||
ls, ok := PressureLevelSet(p)
|
||||
if !ok || ls != LevelSetB {
|
||||
t.Errorf("PressureLevelSet(%d) = %v, %v; want B, true", p, ls, ok)
|
||||
}
|
||||
}
|
||||
|
||||
// Every pressure level should have a level set assignment
|
||||
for _, p := range Pressures {
|
||||
_, ok := PressureLevelSet(p)
|
||||
if !ok {
|
||||
t.Errorf("PressureLevelSet(%d) not found", p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHourIndex(t *testing.T) {
|
||||
if HourIndex(0) != 0 {
|
||||
t.Errorf("HourIndex(0) = %d, want 0", HourIndex(0))
|
||||
}
|
||||
if HourIndex(3) != 1 {
|
||||
t.Errorf("HourIndex(3) = %d, want 1", HourIndex(3))
|
||||
}
|
||||
if HourIndex(192) != 64 {
|
||||
t.Errorf("HourIndex(192) = %d, want 64", HourIndex(192))
|
||||
}
|
||||
if HourIndex(1) != -1 {
|
||||
t.Errorf("HourIndex(1) = %d, want -1 (not multiple of 3)", HourIndex(1))
|
||||
}
|
||||
if HourIndex(195) != -1 {
|
||||
t.Errorf("HourIndex(195) = %d, want -1 (out of range)", HourIndex(195))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHours(t *testing.T) {
|
||||
hours := Hours()
|
||||
if len(hours) != NumHours {
|
||||
t.Fatalf("len(Hours()) = %d, want %d", len(hours), NumHours)
|
||||
}
|
||||
if hours[0] != 0 {
|
||||
t.Errorf("Hours()[0] = %d, want 0", hours[0])
|
||||
}
|
||||
if hours[len(hours)-1] != MaxHour {
|
||||
t.Errorf("Hours()[last] = %d, want %d", hours[len(hours)-1], MaxHour)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVariableIndex(t *testing.T) {
|
||||
if VariableIndex(3, 5) != VarHeight {
|
||||
t.Errorf("HGT: got %d, want %d", VariableIndex(3, 5), VarHeight)
|
||||
}
|
||||
if VariableIndex(2, 2) != VarWindU {
|
||||
t.Errorf("UGRD: got %d, want %d", VariableIndex(2, 2), VarWindU)
|
||||
}
|
||||
if VariableIndex(2, 3) != VarWindV {
|
||||
t.Errorf("VGRD: got %d, want %d", VariableIndex(2, 3), VarWindV)
|
||||
}
|
||||
if VariableIndex(0, 0) != -1 {
|
||||
t.Errorf("unknown: got %d, want -1", VariableIndex(0, 0))
|
||||
}
|
||||
}
|
||||
140
internal/dataset/file.go
Normal file
140
internal/dataset/file.go
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
package dataset
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
mmap "github.com/edsrzf/mmap-go"
|
||||
)
|
||||
|
||||
// File represents an mmap-backed wind dataset file.
|
||||
type File struct {
|
||||
mm mmap.MMap
|
||||
file *os.File
|
||||
writable bool
|
||||
DSTime time.Time // forecast run time (UTC)
|
||||
}
|
||||
|
||||
// Open opens an existing dataset file for reading.
|
||||
func Open(path string, dsTime time.Time) (*File, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open dataset: %w", err)
|
||||
}
|
||||
|
||||
info, err := f.Stat()
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, fmt.Errorf("stat dataset: %w", err)
|
||||
}
|
||||
if info.Size() != DatasetSize {
|
||||
f.Close()
|
||||
return nil, fmt.Errorf("dataset should be %d bytes (was %d)", DatasetSize, info.Size())
|
||||
}
|
||||
|
||||
mm, err := mmap.Map(f, mmap.RDONLY, 0)
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, fmt.Errorf("mmap dataset: %w", err)
|
||||
}
|
||||
|
||||
return &File{mm: mm, file: f, writable: false, DSTime: dsTime}, nil
|
||||
}
|
||||
|
||||
// Create creates a new dataset file for writing.
|
||||
// The file is truncated to the correct size and mmap'd read-write.
|
||||
func Create(path string) (*File, error) {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create dataset: %w", err)
|
||||
}
|
||||
|
||||
if err := f.Truncate(DatasetSize); err != nil {
|
||||
f.Close()
|
||||
return nil, fmt.Errorf("truncate dataset: %w", err)
|
||||
}
|
||||
|
||||
mm, err := mmap.MapRegion(f, int(DatasetSize), mmap.RDWR, 0, 0)
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, fmt.Errorf("mmap dataset: %w", err)
|
||||
}
|
||||
|
||||
return &File{mm: mm, file: f, writable: true}, nil
|
||||
}
|
||||
|
||||
// offset computes the byte offset for element [hour][level][variable][lat][lon].
|
||||
// Row-major C-order indexing matching the reference implementation:
|
||||
// shape = (65, 47, 3, 361, 720)
|
||||
func offset(hour, level, variable, lat, lon int) int64 {
|
||||
idx := int64(hour)
|
||||
idx = idx*int64(NumLevels) + int64(level)
|
||||
idx = idx*int64(NumVariables) + int64(variable)
|
||||
idx = idx*int64(NumLatitudes) + int64(lat)
|
||||
idx = idx*int64(NumLongitudes) + int64(lon)
|
||||
return idx * int64(ElementSize)
|
||||
}
|
||||
|
||||
// Val reads a float32 value from the dataset at [hour][level][variable][lat][lon].
|
||||
func (d *File) Val(hour, level, variable, lat, lon int) float32 {
|
||||
off := offset(hour, level, variable, lat, lon)
|
||||
bits := binary.LittleEndian.Uint32(d.mm[off : off+4])
|
||||
return math.Float32frombits(bits)
|
||||
}
|
||||
|
||||
// SetVal writes a float32 value to the dataset at [hour][level][variable][lat][lon].
|
||||
// Only valid on writable (created) datasets.
|
||||
func (d *File) SetVal(hour, level, variable, lat, lon int, val float32) {
|
||||
off := offset(hour, level, variable, lat, lon)
|
||||
binary.LittleEndian.PutUint32(d.mm[off:off+4], math.Float32bits(val))
|
||||
}
|
||||
|
||||
// BlitGribData copies decoded GRIB grid data into the dataset at the given position.
|
||||
// gribData is 361*720 float64 values in GRIB scan order (north-to-south, west-to-east).
|
||||
// This function flips the latitude so that dataset index 0 = -90 (south) and 360 = +90 (north).
|
||||
func (d *File) BlitGribData(hourIdx, levelIdx, varIdx int, gribData []float64) error {
|
||||
expected := NumLatitudes * NumLongitudes
|
||||
if len(gribData) != expected {
|
||||
return fmt.Errorf("grib data has %d values, expected %d", len(gribData), expected)
|
||||
}
|
||||
|
||||
for lat := 0; lat < NumLatitudes; lat++ {
|
||||
for lon := 0; lon < NumLongitudes; lon++ {
|
||||
// GRIB scans north-to-south: row 0 = 90N, row 360 = 90S
|
||||
// Dataset stores south-to-north: index 0 = -90 (90S), index 360 = +90 (90N)
|
||||
gribIdx := (360-lat)*NumLongitudes + lon
|
||||
val := float32(gribData[gribIdx])
|
||||
d.SetVal(hourIdx, levelIdx, varIdx, lat, lon, val)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush flushes the mmap to disk.
|
||||
func (d *File) Flush() error {
|
||||
if d.mm != nil {
|
||||
return d.mm.Flush()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close unmaps and closes the dataset file.
|
||||
func (d *File) Close() error {
|
||||
if d.mm != nil {
|
||||
if err := d.mm.Unmap(); err != nil {
|
||||
d.file.Close()
|
||||
return fmt.Errorf("unmap: %w", err)
|
||||
}
|
||||
d.mm = nil
|
||||
}
|
||||
if d.file != nil {
|
||||
err := d.file.Close()
|
||||
d.file = nil
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue