engine refactor
This commit is contained in:
parent
9e663db9dc
commit
81b8e763bd
37 changed files with 3532 additions and 1639 deletions
|
|
@ -1,125 +0,0 @@
|
|||
package gfs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// IdxEntry is one parsed line from a NOAA GRIB .idx file.
|
||||
//
|
||||
// Example line: "15:1207405:d=2024010100:HGT:1000 mb:0 hour fcst:"
|
||||
type IdxEntry struct {
|
||||
Index int
|
||||
Offset int64
|
||||
Variable string
|
||||
LevelMB int // 0 when the level is not isobaric
|
||||
Hour int // forecast hour; 0 for analysis ("anl"); -1 if unparseable
|
||||
EndOffset int64 // computed from the next entry's Offset; -1 for the final entry
|
||||
}
|
||||
|
||||
// Length returns the byte length of this GRIB message, or -1 if unknown
|
||||
// (the final entry in an idx file).
|
||||
func (e *IdxEntry) Length() int64 {
|
||||
if e.EndOffset <= 0 {
|
||||
return -1
|
||||
}
|
||||
return e.EndOffset - e.Offset
|
||||
}
|
||||
|
||||
// ParseIdx parses a .idx file body. Unparseable lines are silently skipped.
|
||||
func ParseIdx(body []byte) []IdxEntry {
|
||||
lines := strings.Split(string(body), "\n")
|
||||
var entries []IdxEntry
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
parts := strings.Split(line, ":")
|
||||
if len(parts) < 7 {
|
||||
continue
|
||||
}
|
||||
idx, err := strconv.Atoi(parts[0])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
off, err := strconv.ParseInt(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
entries = append(entries, IdxEntry{
|
||||
Index: idx,
|
||||
Offset: off,
|
||||
Variable: parts[3],
|
||||
LevelMB: parseLevelMB(parts[4]),
|
||||
Hour: parseHour(parts[5]),
|
||||
EndOffset: -1,
|
||||
})
|
||||
}
|
||||
for i := 0; i < len(entries)-1; i++ {
|
||||
entries[i].EndOffset = entries[i+1].Offset
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
// FilterIdx returns entries matching one of the wanted variables at a known
|
||||
// pressure level with a computable byte length.
|
||||
func FilterIdx(entries []IdxEntry, wanted map[string]bool) []IdxEntry {
|
||||
var out []IdxEntry
|
||||
for _, e := range entries {
|
||||
if !wanted[e.Variable] || e.LevelMB <= 0 || e.Length() <= 0 {
|
||||
continue
|
||||
}
|
||||
out = append(out, e)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parseLevelMB(s string) int {
|
||||
s = strings.TrimSpace(s)
|
||||
if !strings.HasSuffix(s, " mb") {
|
||||
return 0
|
||||
}
|
||||
n, err := strconv.Atoi(strings.TrimSuffix(s, " mb"))
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func parseHour(s string) int {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "anl" {
|
||||
return 0
|
||||
}
|
||||
n, err := strconv.Atoi(strings.TrimSuffix(s, " hour fcst"))
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// ByteRange is one HTTP range download corresponding to one GRIB message.
|
||||
type ByteRange struct {
|
||||
Start int64
|
||||
End int64 // inclusive
|
||||
Entry IdxEntry
|
||||
}
|
||||
|
||||
// EntriesToRanges converts idx entries to inclusive HTTP byte ranges.
|
||||
func EntriesToRanges(entries []IdxEntry) []ByteRange {
|
||||
out := make([]ByteRange, 0, len(entries))
|
||||
for _, e := range entries {
|
||||
if e.Length() <= 0 {
|
||||
continue
|
||||
}
|
||||
out = append(out, ByteRange{Start: e.Offset, End: e.EndOffset - 1, Entry: e})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// FormatRange returns an HTTP Range header value for the byte range.
|
||||
func (r ByteRange) FormatRange() string {
|
||||
return fmt.Sprintf("bytes=%d-%d", r.Start, r.End)
|
||||
}
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
package gfs
|
||||
|
||||
import "testing"
|
||||
|
||||
const sampleIdx = `1:0:d=2024010100:HGT:1000 mb:0 hour fcst:
|
||||
2:289012:d=2024010100:HGT:975 mb:0 hour fcst:
|
||||
3:541876:d=2024010100:TMP:1000 mb:0 hour fcst:
|
||||
4:789012:d=2024010100:UGRD:1000 mb:0 hour fcst:
|
||||
5:1045678:d=2024010100:VGRD:1000 mb:0 hour fcst:
|
||||
6:1298765:d=2024010100:UGRD:975 mb:0 hour fcst:
|
||||
7:1567890:d=2024010100:UGRD:2 m above ground:0 hour fcst:
|
||||
8:1812345:d=2024010100:VGRD:975 mb:0 hour fcst:
|
||||
9:2098765:d=2024010100:HGT:500 mb:3 hour fcst:
|
||||
`
|
||||
|
||||
func TestParseIdx(t *testing.T) {
|
||||
entries := ParseIdx([]byte(sampleIdx))
|
||||
if len(entries) != 9 {
|
||||
t.Fatalf("expected 9 entries, got %d", len(entries))
|
||||
}
|
||||
if e := entries[0]; e.Index != 1 || e.Offset != 0 || e.Variable != "HGT" || e.LevelMB != 1000 || e.Hour != 0 || e.EndOffset != 289012 {
|
||||
t.Errorf("entry 0: %+v", e)
|
||||
}
|
||||
if e := entries[6]; e.LevelMB != 0 {
|
||||
t.Errorf("non-pressure level should have LevelMB=0, got %d", e.LevelMB)
|
||||
}
|
||||
if e := entries[len(entries)-1]; e.EndOffset != -1 {
|
||||
t.Errorf("last entry EndOffset: got %d, want -1", e.EndOffset)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterIdx(t *testing.T) {
|
||||
entries := ParseIdx([]byte(sampleIdx))
|
||||
want := map[string]bool{"HGT": true, "UGRD": true, "VGRD": true}
|
||||
filtered := FilterIdx(entries, want)
|
||||
// HGT@1000, HGT@975, UGRD@1000, VGRD@1000, UGRD@975, VGRD@975 = 6
|
||||
// HGT@500 at 3hr is last entry (no EndOffset), so dropped.
|
||||
if len(filtered) != 6 {
|
||||
t.Errorf("expected 6, got %d", len(filtered))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseLevelMB(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want int
|
||||
}{
|
||||
{"1000 mb", 1000}, {"975 mb", 975}, {"1 mb", 1},
|
||||
{"2 m above ground", 0}, {"surface", 0}, {"tropopause", 0},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := parseLevelMB(c.in); got != c.want {
|
||||
t.Errorf("parseLevelMB(%q) = %d, want %d", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHour(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want int
|
||||
}{
|
||||
{"0 hour fcst", 0}, {"3 hour fcst", 3}, {"192 hour fcst", 192}, {"anl", 0},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := parseHour(c.in); got != c.want {
|
||||
t.Errorf("parseHour(%q) = %d, want %d", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,85 +1,96 @@
|
|||
// Package gfs implements datasets.Source for NOAA GFS 0.5-degree forecasts.
|
||||
// Package gfs implements datasets.Source for NOAA GFS forecasts.
|
||||
//
|
||||
// The package serves multiple GFS variants (0.5° 3-hour, 0.25° 3-hour,
|
||||
// 0.25° 1-hour); the variant is selected at construction time. The
|
||||
// download skeleton (HTTP, idx parsing, parallel blit) lives in
|
||||
// internal/datasets/grib; this package only supplies URL templating and
|
||||
// the Source-interface plumbing.
|
||||
package gfs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/nilsmagnus/grib/griblib"
|
||||
"go.uber.org/zap"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"predictor-refactored/internal/datasets"
|
||||
"predictor-refactored/internal/datasets/grib"
|
||||
"predictor-refactored/internal/weather"
|
||||
wgfs "predictor-refactored/internal/weather/gfs"
|
||||
)
|
||||
|
||||
// Source is the GFS implementation of datasets.Source.
|
||||
type Source struct {
|
||||
Parallel int // max concurrent step downloads
|
||||
Client *http.Client // optional; defaults to a 2-minute-timeout client
|
||||
Variant *wgfs.Variant
|
||||
Parallel int
|
||||
Client *http.Client
|
||||
Log *zap.Logger
|
||||
}
|
||||
|
||||
// NewSource returns a default Source.
|
||||
func NewSource(log *zap.Logger) *Source {
|
||||
// NewSource returns a default Source over variant. If variant is nil,
|
||||
// GFS 0.5° 3-hour is used (the historical Tawhiri default).
|
||||
func NewSource(variant *wgfs.Variant, log *zap.Logger) *Source {
|
||||
if variant == nil {
|
||||
variant = wgfs.GFS0p50_3h
|
||||
}
|
||||
return &Source{
|
||||
Variant: variant,
|
||||
Parallel: 8,
|
||||
Client: &http.Client{Timeout: 2 * time.Minute},
|
||||
Log: log,
|
||||
}
|
||||
}
|
||||
|
||||
// ID returns the source identifier.
|
||||
func (s *Source) ID() string { return "noaa-gfs-0p50" }
|
||||
// ID returns the variant's ID.
|
||||
func (s *Source) ID() string { return s.Variant.ID }
|
||||
|
||||
func (s *Source) log() *zap.Logger {
|
||||
if s.Log == nil {
|
||||
return zap.NewNop()
|
||||
func (s *Source) downloader() *grib.Downloader {
|
||||
return &grib.Downloader{
|
||||
Variant: s.Variant,
|
||||
URLs: s.url,
|
||||
Parallel: s.Parallel,
|
||||
Client: s.Client,
|
||||
Log: s.Log,
|
||||
}
|
||||
return s.Log
|
||||
}
|
||||
|
||||
func (s *Source) client() *http.Client {
|
||||
if s.Client == nil {
|
||||
return &http.Client{Timeout: 2 * time.Minute}
|
||||
// url generates the GFS URL for one (date, runHour, _, step, levelSet).
|
||||
// member is unused for GFS.
|
||||
func (s *Source) url(date string, runHour, _, step int, ls wgfs.LevelSet) string {
|
||||
if ls == wgfs.LevelSetB {
|
||||
return s.Variant.GribURLB(date, runHour, step)
|
||||
}
|
||||
return s.Client
|
||||
return s.Variant.GribURL(date, runHour, step)
|
||||
}
|
||||
|
||||
func (s *Source) parallel() int {
|
||||
if s.Parallel <= 0 {
|
||||
return 8
|
||||
}
|
||||
return s.Parallel
|
||||
}
|
||||
|
||||
// LatestEpoch returns the most recent run NOAA has finished publishing,
|
||||
// determined by HEAD-ing the .idx for the final forecast hour. Walks back
|
||||
// up to 8 runs (48 hours) before giving up.
|
||||
// LatestEpoch returns the most recent run NOAA has finished publishing.
|
||||
func (s *Source) LatestEpoch(ctx context.Context) (time.Time, error) {
|
||||
now := time.Now().UTC()
|
||||
hour := now.Hour() - (now.Hour() % 6)
|
||||
current := time.Date(now.Year(), now.Month(), now.Day(), hour, 0, 0, 0, time.UTC)
|
||||
|
||||
client := s.Client
|
||||
if client == nil {
|
||||
client = &http.Client{Timeout: 2 * time.Minute}
|
||||
}
|
||||
log := s.Log
|
||||
if log == nil {
|
||||
log = zap.NewNop()
|
||||
}
|
||||
|
||||
for range 8 {
|
||||
date := current.Format("20060102")
|
||||
url := wgfs.GribURL(date, current.Hour(), wgfs.MaxHour) + ".idx"
|
||||
|
||||
url := s.Variant.GribURL(date, current.Hour(), s.Variant.MaxHour) + ".idx"
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
|
||||
if err == nil {
|
||||
resp, err := s.client().Do(req)
|
||||
resp, err := client.Do(req)
|
||||
if err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
s.log().Info("latest GFS run discovered",
|
||||
log.Info("latest run discovered",
|
||||
zap.String("variant", s.Variant.ID),
|
||||
zap.Time("run", current),
|
||||
zap.String("verified_url", url))
|
||||
return current, nil
|
||||
|
|
@ -88,343 +99,40 @@ func (s *Source) LatestEpoch(ctx context.Context) (time.Time, error) {
|
|||
}
|
||||
current = current.Add(-6 * time.Hour)
|
||||
}
|
||||
return time.Time{}, fmt.Errorf("no recent GFS run found (checked 8 runs)")
|
||||
return time.Time{}, fmt.Errorf("no recent %s run found (checked 8 runs)", s.Variant.ID)
|
||||
}
|
||||
|
||||
// Coverage returns the geographic and temporal extent of id.
|
||||
func (s *Source) Coverage(id datasets.DatasetID) datasets.Coverage {
|
||||
v := s.Variant
|
||||
cov := datasets.Coverage{
|
||||
Region: datasets.Region{MinLat: -90, MaxLat: 90, MinLng: 0, MaxLng: 360},
|
||||
StartTime: id.Epoch,
|
||||
EndTime: id.Epoch.Add(time.Duration(v.MaxHour) * time.Hour),
|
||||
}
|
||||
if r := id.Subset.Region; r != nil {
|
||||
cov.Region = *r
|
||||
}
|
||||
if h := id.Subset.HourRange; h != nil {
|
||||
cov.StartTime = id.Epoch.Add(time.Duration(h.MinHour) * time.Hour)
|
||||
cov.EndTime = id.Epoch.Add(time.Duration(h.MaxHour) * time.Hour)
|
||||
}
|
||||
return cov
|
||||
}
|
||||
|
||||
// Open loads a stored dataset as a WindField.
|
||||
func (s *Source) Open(_ context.Context, epoch time.Time, store datasets.Storage) (weather.WindField, error) {
|
||||
if !store.Exists(epoch) {
|
||||
return nil, fmt.Errorf("epoch %s not found", epoch.Format(time.RFC3339))
|
||||
func (s *Source) Open(_ context.Context, id datasets.DatasetID, store datasets.Storage) (weather.WindField, error) {
|
||||
if !store.Exists(id) {
|
||||
return nil, fmt.Errorf("dataset %s not found", id.Filename())
|
||||
}
|
||||
file, err := wgfs.Open(store.Path(epoch), epoch.UTC())
|
||||
file, err := wgfs.Open(store.Path(id), s.Variant, id.Epoch.UTC())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return wgfs.NewWind(file), nil
|
||||
}
|
||||
|
||||
// neededVariables is the GRIB variable set we extract.
|
||||
var neededVariables = map[string]bool{"HGT": true, "UGRD": true, "VGRD": true}
|
||||
|
||||
// Download fetches the full dataset for epoch in parallel, resuming any
|
||||
// previously-completed work units. Honours ctx cancellation and prog
|
||||
// (which may be nil).
|
||||
func (s *Source) Download(ctx context.Context, epoch time.Time, store datasets.Storage, prog datasets.ProgressSink, throttle datasets.Throttle) error {
|
||||
if prog == nil {
|
||||
prog = noopSink{}
|
||||
}
|
||||
|
||||
handle, err := store.BeginWrite(epoch)
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin write: %w", err)
|
||||
}
|
||||
manifest := handle.Manifest()
|
||||
|
||||
// Open or create the temp file. If a previous attempt left a partial
|
||||
// file of the right size, reuse it (resume); otherwise Create.
|
||||
file, err := openOrCreateCube(handle.Path())
|
||||
if err != nil {
|
||||
_ = handle.Abort()
|
||||
return err
|
||||
}
|
||||
|
||||
date := epoch.UTC().Format("20060102")
|
||||
runHour := epoch.UTC().Hour()
|
||||
steps := wgfs.Hours()
|
||||
totalUnits := len(steps) * 2
|
||||
|
||||
prog.SetTotal(totalUnits)
|
||||
// Pre-count already-done units so progress is accurate on resume.
|
||||
for _, u := range manifest.Units() {
|
||||
_ = u
|
||||
prog.StepComplete()
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
g, ctx := errgroup.WithContext(ctx)
|
||||
g.SetLimit(s.parallel())
|
||||
|
||||
// fileMu serialises concurrent BlitGribData calls because the underlying
|
||||
// mmap is shared and SetVal isn't atomic.
|
||||
var fileMu sync.Mutex
|
||||
|
||||
for _, step := range steps {
|
||||
hourIdx := wgfs.HourIndex(step)
|
||||
if hourIdx < 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, ls := range []wgfs.LevelSet{wgfs.LevelSetA, wgfs.LevelSetB} {
|
||||
unit := unitKey(step, ls)
|
||||
if manifest.Has(unit) {
|
||||
continue
|
||||
}
|
||||
|
||||
g.Go(func() error {
|
||||
var url string
|
||||
switch ls {
|
||||
case wgfs.LevelSetA:
|
||||
url = wgfs.GribURL(date, runHour, step)
|
||||
case wgfs.LevelSetB:
|
||||
url = wgfs.GribURLB(date, runHour, step)
|
||||
}
|
||||
if err := s.downloadAndBlit(ctx, file, &fileMu, url, hourIdx, ls, prog, throttle); err != nil {
|
||||
return fmt.Errorf("step %d %s: %w", step, levelSetLabel(ls), err)
|
||||
}
|
||||
if err := manifest.Mark(unit); err != nil {
|
||||
return fmt.Errorf("mark unit: %w", err)
|
||||
}
|
||||
prog.StepComplete()
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if err := g.Wait(); err != nil {
|
||||
_ = file.Close()
|
||||
// Don't Abort on context cancellation — preserve progress for resume.
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return err
|
||||
}
|
||||
// Other errors: abort if no progress was made; otherwise leave for resume.
|
||||
if len(manifest.Units()) == 0 {
|
||||
_ = handle.Abort()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if err := file.Flush(); err != nil {
|
||||
_ = file.Close()
|
||||
return fmt.Errorf("flush: %w", err)
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
return fmt.Errorf("close: %w", err)
|
||||
}
|
||||
if err := handle.Commit(); err != nil {
|
||||
return fmt.Errorf("commit: %w", err)
|
||||
}
|
||||
|
||||
s.log().Info("download complete",
|
||||
zap.Time("epoch", epoch),
|
||||
zap.Duration("elapsed", time.Since(start)))
|
||||
return nil
|
||||
// Download fetches the dataset for id. GFS ignores Subset.Members.
|
||||
func (s *Source) Download(ctx context.Context, id datasets.DatasetID, store datasets.Storage, prog datasets.ProgressSink, throttle datasets.Throttle) error {
|
||||
return s.downloader().Run(ctx, id, 0, store, prog, throttle)
|
||||
}
|
||||
|
||||
// openOrCreateCube returns a writable cube file at path, creating it if the
|
||||
// file does not exist or has the wrong size.
|
||||
func openOrCreateCube(path string) (*wgfs.File, error) {
|
||||
info, err := os.Stat(path)
|
||||
if err == nil && info.Size() == wgfs.DatasetSize {
|
||||
return wgfs.OpenWritable(path)
|
||||
}
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return nil, fmt.Errorf("stat cube: %w", err)
|
||||
}
|
||||
// Wrong-size or missing — truncate-create.
|
||||
return wgfs.Create(path)
|
||||
}
|
||||
|
||||
// downloadAndBlit fetches and decodes one (URL, level-set) chunk and writes
|
||||
// it into the dataset.
|
||||
func (s *Source) downloadAndBlit(
|
||||
ctx context.Context,
|
||||
file *wgfs.File,
|
||||
fileMu *sync.Mutex,
|
||||
baseURL string,
|
||||
hourIdx int,
|
||||
ls wgfs.LevelSet,
|
||||
prog datasets.ProgressSink,
|
||||
throttle datasets.Throttle,
|
||||
) error {
|
||||
idxBody, err := s.httpGet(ctx, baseURL+".idx", throttle, prog)
|
||||
if err != nil {
|
||||
return fmt.Errorf("idx: %w", err)
|
||||
}
|
||||
entries := ParseIdx(idxBody)
|
||||
filtered := FilterIdx(entries, neededVariables)
|
||||
|
||||
var relevant []IdxEntry
|
||||
for _, e := range filtered {
|
||||
set, ok := wgfs.PressureLevelSet(e.LevelMB)
|
||||
if ok && set == ls {
|
||||
relevant = append(relevant, e)
|
||||
}
|
||||
}
|
||||
if len(relevant) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
ranges := EntriesToRanges(relevant)
|
||||
tmp, err := os.CreateTemp("", "gfs-msg-*.tmp")
|
||||
if err != nil {
|
||||
return fmt.Errorf("temp: %w", err)
|
||||
}
|
||||
tmpPath := tmp.Name()
|
||||
defer os.Remove(tmpPath)
|
||||
|
||||
for _, r := range ranges {
|
||||
body, err := s.httpGetRange(ctx, baseURL, r.Start, r.End, throttle, prog)
|
||||
if err != nil {
|
||||
tmp.Close()
|
||||
return fmt.Errorf("range %d-%d: %w", r.Start, r.End, err)
|
||||
}
|
||||
if _, err := tmp.Write(body); err != nil {
|
||||
tmp.Close()
|
||||
return fmt.Errorf("write tmp: %w", err)
|
||||
}
|
||||
}
|
||||
if err := tmp.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Open(tmpPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
messages, err := griblib.ReadMessages(f)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("read grib: %w", err)
|
||||
}
|
||||
|
||||
for _, msg := range messages {
|
||||
if msg.Section4.ProductDefinitionTemplateNumber != 0 {
|
||||
continue
|
||||
}
|
||||
p := msg.Section4.ProductDefinitionTemplate
|
||||
varIdx := wgfs.VariableIndex(int(p.ParameterCategory), int(p.ParameterNumber))
|
||||
if varIdx < 0 {
|
||||
continue
|
||||
}
|
||||
if p.FirstSurface.Type != 100 { // isobaric only
|
||||
continue
|
||||
}
|
||||
pressureMB := int(math.Round(float64(p.FirstSurface.Value) / 100.0))
|
||||
levelIdx := wgfs.PressureIndex(pressureMB)
|
||||
if levelIdx < 0 {
|
||||
continue
|
||||
}
|
||||
data := msg.Data()
|
||||
fileMu.Lock()
|
||||
err := file.BlitGribData(hourIdx, levelIdx, varIdx, data)
|
||||
fileMu.Unlock()
|
||||
if err != nil {
|
||||
return fmt.Errorf("blit: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// httpGet downloads a URL body with 3 retries and optional throttling.
|
||||
func (s *Source) httpGet(ctx context.Context, url string, throttle datasets.Throttle, prog datasets.ProgressSink) ([]byte, error) {
|
||||
var lastErr error
|
||||
for attempt := range 3 {
|
||||
if attempt > 0 {
|
||||
select {
|
||||
case <-time.After(time.Duration(attempt*2) * time.Second):
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := s.client().Do(req)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
body, err := readThrottled(ctx, resp.Body, throttle, prog)
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
lastErr = fmt.Errorf("HTTP %d for %s", resp.StatusCode, url)
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
return nil, fmt.Errorf("after 3 attempts: %w", lastErr)
|
||||
}
|
||||
|
||||
// httpGetRange downloads an inclusive byte range with 3 retries and throttling.
|
||||
func (s *Source) httpGetRange(ctx context.Context, url string, start, end int64, throttle datasets.Throttle, prog datasets.ProgressSink) ([]byte, error) {
|
||||
var lastErr error
|
||||
for attempt := range 3 {
|
||||
if attempt > 0 {
|
||||
select {
|
||||
case <-time.After(time.Duration(attempt*2) * time.Second):
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", start, end))
|
||||
resp, err := s.client().Do(req)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
body, err := readThrottled(ctx, resp.Body, throttle, prog)
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK {
|
||||
lastErr = fmt.Errorf("HTTP %d for range %d-%d of %s", resp.StatusCode, start, end, url)
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
return nil, fmt.Errorf("after 3 attempts: %w", lastErr)
|
||||
}
|
||||
|
||||
// readThrottled reads r into memory, consulting throttle (if non-nil) before
|
||||
// each chunk and reporting bytes to prog.
|
||||
func readThrottled(ctx context.Context, r io.Reader, throttle datasets.Throttle, prog datasets.ProgressSink) ([]byte, error) {
|
||||
buf := make([]byte, 0, 64*1024)
|
||||
chunk := make([]byte, 32*1024)
|
||||
for {
|
||||
if throttle != nil {
|
||||
if err := throttle.Wait(ctx, len(chunk)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
n, err := r.Read(chunk)
|
||||
if n > 0 {
|
||||
buf = append(buf, chunk[:n]...)
|
||||
prog.Bytes(int64(n))
|
||||
}
|
||||
if errors.Is(err, io.EOF) {
|
||||
return buf, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func unitKey(step int, ls wgfs.LevelSet) string {
|
||||
return fmt.Sprintf("step%03d-%s", step, levelSetLabel(ls))
|
||||
}
|
||||
|
||||
func levelSetLabel(ls wgfs.LevelSet) string {
|
||||
if ls == wgfs.LevelSetB {
|
||||
return "B"
|
||||
}
|
||||
return "A"
|
||||
}
|
||||
|
||||
// noopSink discards progress events.
|
||||
type noopSink struct{}
|
||||
|
||||
func (noopSink) SetTotal(int) {}
|
||||
func (noopSink) StepComplete() {}
|
||||
func (noopSink) Bytes(int64) {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue