updated downloader

2026-03-22 16:29:53 +09:00 · 2026-03-22 16:29:53 +09:00 · 8e9f117799
commit 8e9f117799
parent ca95e06ab7
30 changed files with 1209 additions and 698 deletions
--- a/internal/pkg/grib/assemble_test.go
+++ b/internal/pkg/grib/assemble_test.go
@ -0,0 +1,25 @@
+package grib
+
+import (
+	"testing"
+	"time"
+)
+
+func TestAssembleCubeFromExisting(t *testing.T) {
+	dir := "C:/tmp/grib"
+	run := time.Date(2026, 1, 16, 6, 0, 0, 0, time.UTC)
+	cubePath := dir + "/" + run.Format("20060102_15") + ".cube"
+
+	t.Logf("Assembling cube from existing GRIB files...")
+	t.Logf("Directory: %s", dir)
+	t.Logf("Run: %s", run.Format("2006-01-02 15:04 MST"))
+	t.Logf("Output: %s", cubePath)
+
+	err := assembleCube(dir, run, cubePath)
+	if err != nil {
+		t.Fatalf("Failed to assemble cube: %v", err)
+	}
+
+	t.Logf("✓ Cube assembled successfully!")
+	t.Logf("Cube file: %s", cubePath)
+}
--- a/internal/pkg/grib/config.go
+++ b/internal/pkg/grib/config.go
@ -1,23 +1,130 @@
 package grib

 import (
+	"fmt"
 	"time"

 	"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
 	env "github.com/caarlos0/env/v11"
 )

+// DatasetConfig описывает параметры GFS-датасета: сетку, временные шаги,
+// уровни давления и URL для загрузки.
+type DatasetConfig struct {
+	// Сетка
+	Resolution float64 // шаг сетки в градусах (0.25 или 0.5)
+	NLat       int     // точек по широте  (721 для 0.25°, 361 для 0.5°)
+	NLon       int     // точек по долготе (1440 для 0.25°, 720 для 0.5°)
+
+	// Время
+	NT       int // кол-во временных шагов (97 для 0–96 ч с шагом 1)
+	MaxHour  int // последний час прогноза (96)
+	TimeStep int // интервал между шагами, часы (1 или 3)
+
+	// Вертикаль
+	NP     int       // кол-во уровней давления
+	Levels []float64 // уровни давления в гПа, по убыванию (1000 … 1)
+
+	// Переменные в кубе (порядок важен: индексы 0, 1, 2, …)
+	NVar      int      // кол-во переменных
+	Variables []string // GRIB-имена для фильтрации idx (HGT, UGRD, VGRD)
+
+	// URL загрузки (fmt-шаблоны: date, hour, hour, step)
+	URLMask  string // основной pgrb2
+	URLMaskB string // дополнительный pgrb2b
+
+	// Имена файлов
+	FileSuffix string // токен разрешения в именах файлов ("0p25", "0p50")
+}
+
+// SizePerVar возвращает размер одной переменной в кубе, байт.
+func (dc *DatasetConfig) SizePerVar() int64 {
+	return int64(dc.NT) * int64(dc.NP) * int64(dc.NLat) * int64(dc.NLon) * 4
+}
+
+// CubeSize возвращает полный размер куба, байт.
+func (dc *DatasetConfig) CubeSize() int64 {
+	return dc.SizePerVar() * int64(dc.NVar)
+}
+
+// GridSize возвращает NLat * NLon.
+func (dc *DatasetConfig) GridSize() int {
+	return dc.NLat * dc.NLon
+}
+
+// InvResolution возвращает 1/Resolution — множитель для перевода координат в индексы.
+func (dc *DatasetConfig) InvResolution() float64 {
+	return 1.0 / dc.Resolution
+}
+
+// Steps возвращает список часов прогноза [0, TimeStep, 2*TimeStep, …, MaxHour].
+func (dc *DatasetConfig) Steps() []int {
+	out := make([]int, 0, dc.NT)
+	for h := 0; h <= dc.MaxHour; h += dc.TimeStep {
+		out = append(out, h)
+	}
+	return out
+}
+
+// FileName возвращает имя основного GRIB-файла (pgrb2).
+func (dc *DatasetConfig) FileName(run time.Time, step int) string {
+	return fmt.Sprintf("gfs.t%02dz.pgrb2.%s.f%03d", run.Hour(), dc.FileSuffix, step)
+}
+
+// FileNameB возвращает имя вторичного GRIB-файла (pgrb2b).
+func (dc *DatasetConfig) FileNameB(run time.Time, step int) string {
+	return fmt.Sprintf("gfs.t%02dz.pgrb2b.%s.f%03d", run.Hour(), dc.FileSuffix, step)
+}
+
+// GribURL возвращает URL основного GRIB-файла.
+func (dc *DatasetConfig) GribURL(run time.Time, step int) string {
+	return fmt.Sprintf(dc.URLMask, run.Format("20060102"), run.Hour(), run.Hour(), step)
+}
+
+// GribURLB возвращает URL вторичного GRIB-файла.
+func (dc *DatasetConfig) GribURLB(run time.Time, step int) string {
+	return fmt.Sprintf(dc.URLMaskB, run.Format("20060102"), run.Hour(), run.Hour(), step)
+}
+
+// DefaultDatasetConfig возвращает конфиг GFS 0.25° / 1 час / 47 уровней.
+func DefaultDatasetConfig() DatasetConfig {
+	return DatasetConfig{
+		Resolution: 0.25,
+		NLat:       721,
+		NLon:       1440,
+
+		NT:       97,
+		MaxHour:  96,
+		TimeStep: 1,
+
+		NP: 47,
+		Levels: []float64{
+			1000, 975, 950, 925, 900, 875, 850, 825, 800, 775,
+			750, 725, 700, 675, 650, 625, 600, 575, 550, 525,
+			500, 475, 450, 425, 400, 375, 350, 325, 300, 275,
+			250, 225, 200, 175, 150, 125, 100, 70, 50, 30,
+			20, 10, 7, 5, 3, 2, 1,
+		},
+
+		NVar:      3,
+		Variables: []string{"HGT", "UGRD", "VGRD"},
+
+		URLMask:  "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p25.f%03d",
+		URLMaskB: "https://noaa-gfs-bdp-pds.s3.amazonaws.com/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2b.0p25.f%03d",
+
+		FileSuffix: "0p25",
+	}
+}
+
+// ---------------------------------------------------------------------------
+
 type Config struct {
-	Dir        string        `env:"DIR" envDefault:"C:/tmp/grib"`
-	TTL        time.Duration `env:"TTL" envDefault:"48h"`
-	CacheTTL   time.Duration `env:"CACHE_TTL" envDefault:"1h"`
-	Parallel   int           `env:"PARALLEL" envDefault:"8"`
-	DatasetURL string        `env:"DATASET_URL" envDefault:"https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod"`
-	// S3 configuration
-	UseS3      bool          `env:"USE_S3" envDefault:"true"`
-	S3Bucket   string        `env:"S3_BUCKET" envDefault:"noaa-gfs-bdp-pds"`
-	S3Region   string        `env:"S3_REGION" envDefault:"us-east-1"`
-	S3Timeout  time.Duration `env:"S3_TIMEOUT" envDefault:"300s"`
+	Dir      string        `env:"DIR" envDefault:"C:/tmp/grib"`
+	TTL      time.Duration `env:"TTL" envDefault:"48h"`
+	CacheTTL time.Duration `env:"CACHE_TTL" envDefault:"1h"`
+	Parallel int           `env:"PARALLEL" envDefault:"8"`
+
+	Dataset DatasetConfig
 }

 func NewConfig() (*Config, error) {
@ -27,6 +134,6 @@ func NewConfig() (*Config, error) {
 	}); err != nil {
 		return nil, errcodes.Wrap(err, "failed to parse GRIB config")
 	}
-
+	cfg.Dataset = DefaultDatasetConfig()
 	return cfg, nil
 }
--- a/internal/pkg/grib/create_dataset.go
+++ b/internal/pkg/grib/create_dataset.go
--- a/internal/pkg/grib/cube.go
+++ b/internal/pkg/grib/cube.go
@ -15,7 +15,7 @@ type cube struct {
 	file           *os.File
 }

-func openCube(path string) (*cube, error) {
+func openCube(path string, dc *DatasetConfig) (*cube, error) {
 	f, err := os.Open(path)
 	if err != nil {
 		return nil, err
@ -27,14 +27,15 @@ func openCube(path string) (*cube, error) {
 		return nil, err
 	}

-	const (
-		nT   = 33  // 0-96 hours with step 3 hours (33 time steps)
-		nP   = 47  // 47 pressure levels matching tawhiri
-		nLat = 361
-		nLon = 720
-	)
-
-	return &cube{mm: mm, t: nT, p: nP, lat: nLat, lon: nLon, bytesPerVar: int64(nT * nP * nLat * nLon * 4), file: f}, nil
+	return &cube{
+		mm:          mm,
+		t:           dc.NT,
+		p:           dc.NP,
+		lat:         dc.NLat,
+		lon:         dc.NLon,
+		bytesPerVar: dc.SizePerVar(),
+		file:        f,
+	}, nil
 }

 func (c *cube) val(varIdx, ti, pi, y, x int) float32 {
--- a/internal/pkg/grib/dataset.go
+++ b/internal/pkg/grib/dataset.go
@ -2,6 +2,7 @@ package grib

 type dataset struct {
 	cube   *cube
+	ds     *DatasetConfig
 	runUTC int64 // unix seconds
 }

--- a/internal/pkg/grib/downloader.go
+++ b/internal/pkg/grib/downloader.go
@ -1,91 +0,0 @@
-package grib
-
-import (
-	"context"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-	"path/filepath"
-	"time"
-
-	"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
-	"golang.org/x/sync/errgroup"
-)
-
-type Downloader struct {
-	Dir        string
-	Parallel   int
-	Client     *http.Client
-	DatasetURL string
-}
-
-func (d *Downloader) fileURL(run string, hour int, step int) string {
-	return fmt.Sprintf("%s/gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p50.f%03d", d.DatasetURL, run, hour, hour, step)
-}
-
-func (d *Downloader) fetch(ctx context.Context, url, dst string) (err error) {
-	// Check if final file already exists
-	if _, err := os.Stat(dst); err == nil {
-		return nil
-	}
-
-	tmp := dst + ".part"
-
-	// Remove old .part file if it exists (fixes race condition)
-	os.Remove(tmp)
-
-	f, err := os.Create(tmp)
-	if err != nil {
-		return err
-	}
-
-	// Cleanup .part file on any error (using named return value)
-	defer func() {
-		f.Close()
-		if err != nil {
-			os.Remove(tmp)
-		}
-	}()
-
-	req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
-	resp, err := d.Client.Do(req)
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		return errcodes.Wrap(errcodes.ErrDownload, "bad status: "+resp.Status)
-	}
-
-	if _, err := io.Copy(f, resp.Body); err != nil {
-		return err
-	}
-
-	// Close file before rename
-	if err := f.Close(); err != nil {
-		return err
-	}
-
-	// If rename fails, err will be set and defer will cleanup .part file
-	return os.Rename(tmp, dst)
-}
-
-func (d *Downloader) Run(ctx context.Context, run time.Time) error {
-	runStr := run.Format("20060102")
-	hour := run.Hour()
-	g, ctx := errgroup.WithContext(ctx)
-	sem := make(chan struct{}, d.Parallel)
-	for _, step := range steps {
-		step := step
-		sem <- struct{}{}
-		g.Go(func() error {
-			defer func() { <-sem }()
-			url := d.fileURL(runStr, hour, step)
-			dst := filepath.Join(d.Dir, fileName(run, step))
-			return d.fetch(ctx, url, dst)
-		})
-	}
-	return g.Wait()
-}
--- a/internal/pkg/grib/extractor.go
+++ b/internal/pkg/grib/extractor.go
@ -4,30 +4,100 @@ import "math"

 func lerp(a, b, t float64) float64 { return a + t*(b-a) }

-// Interpolate 16‑point (time, p, lat, lon)
+// ghInterp returns interpolated geopotential height at given time/pressure/lat/lon
+func (d *dataset) ghInterp(ti, pi int, y0, y1, x0, x1 int, wy, wx float64) float64 {
+	g00 := d.cube.val(0, ti, pi, y0, x0)
+	g10 := d.cube.val(0, ti, pi, y0, x1)
+	g01 := d.cube.val(0, ti, pi, y1, x0)
+	g11 := d.cube.val(0, ti, pi, y1, x1)
+	return (1-wy)*((1-wx)*float64(g00)+wx*float64(g10)) + wy*((1-wx)*float64(g01)+wx*float64(g11))
+}
+
+// searchAltLevel uses geopotential height to find pressure level bracket for target altitude.
+func (d *dataset) searchAltLevel(alt float64, ti, y0, y1, x0, x1 int, wy, wx float64) (int, float64) {
+	levels := d.ds.Levels
+	nLevels := len(levels)
+
+	lo, hi := 0, nLevels-1
+	for lo < hi-1 {
+		mid := (lo + hi) / 2
+		ghMid := d.ghInterp(ti, mid, y0, y1, x0, x1, wy, wx)
+		if ghMid < alt {
+			lo = mid
+		} else {
+			hi = mid
+		}
+	}
+
+	ghLo := d.ghInterp(ti, lo, y0, y1, x0, x1, wy, wx)
+	ghHi := d.ghInterp(ti, hi, y0, y1, x0, x1, wy, wx)
+
+	wp := 0.0
+	if ghHi != ghLo {
+		wp = (alt - ghLo) / (ghHi - ghLo)
+	}
+	if wp < 0 {
+		wp = 0
+	}
+	if wp > 1 {
+		wp = 1
+	}
+
+	return lo, wp
+}
+
+// uv выполняет интерполяцию ветра по 4 измерениям (time, pressure, lat, lon).
 func (d *dataset) uv(lat, lon, alt float64, tHours float64) (float64, float64) {
 	if lon < 0 {
 		lon += 360
 	}
-	iy := (lat + 90) * 2
+
+	inv := d.ds.InvResolution()
+
+	// GRIB scan north→south: index 0 = 90°N
+	iy := (90 - lat) * inv
 	y0 := int(math.Floor(iy))
+	if y0 < 0 {
+		y0 = 0
+	}
+	if y0 >= d.cube.lat-1 {
+		y0 = d.cube.lat - 2
+	}
 	y1 := y0 + 1
 	wy := iy - float64(y0)
-	ix := lon * 2
+
+	ix := lon * inv
 	x0 := int(math.Floor(ix)) % d.cube.lon
 	x1 := (x0 + 1) % d.cube.lon
 	wx := ix - float64(x0)
-	// For 3-hourly data (step = 3 hours)
-	// Convert tHours to 3-hour index (e.g., 1.5 hours -> index 0.5, interpolate between 0 and 1)
-	it0 := int(math.Floor(tHours / 3.0))
-	wt := (tHours - float64(it0*3)) / 3.0 // Interpolation weight within 3-hour window
+
+	// Время: tHours делим на шаг, чтобы получить индекс в кубе
+	tIdx := tHours / float64(d.ds.TimeStep)
+	it0 := int(math.Floor(tIdx))
+	if it0 < 0 {
+		it0 = 0
+	}
+	if it0 >= d.cube.t-1 {
+		it0 = d.cube.t - 2
+	}
+	wt := tIdx - float64(it0)
+
+	// ISA: высота → давление → индекс уровня
+	levels := d.ds.Levels
 	p := pressureFromAlt(alt)
 	ip0 := 0
-	for ip0+1 < len(pressureLevels) && pressureLevels[ip0+1] > p {
+	for ip0+1 < len(levels) && levels[ip0+1] > p {
 		ip0++
 	}
 	ip1 := ip0 + 1
-	wp := (pressureLevels[ip0] - p) / (pressureLevels[ip0] - pressureLevels[ip1])
+	if ip1 >= len(levels) {
+		ip1 = len(levels) - 1
+	}
+	wp := 0.0
+	if levels[ip0] != levels[ip1] {
+		wp = (levels[ip0] - p) / (levels[ip0] - levels[ip1])
+	}
+
 	fetch := func(ti, pi int) (float64, float64) {
 		u00 := d.cube.val(1, ti, pi, y0, x0)
 		u10 := d.cube.val(1, ti, pi, y0, x1)
@ -41,6 +111,7 @@ func (d *dataset) uv(lat, lon, alt float64, tHours float64) (float64, float64) {
 		vxy := (1-wy)*((1-wx)*float64(v00)+wx*float64(v10)) + wy*((1-wx)*float64(v01)+wx*float64(v11))
 		return uxy, vxy
 	}
+
 	u0p0, v0p0 := fetch(it0, ip0)
 	u0p1, v0p1 := fetch(it0, ip1)
 	u1p0, v1p0 := fetch(it0+1, ip0)
--- a/internal/pkg/grib/grib.go
+++ b/internal/pkg/grib/grib.go
@ -4,7 +4,6 @@ import (
 	"context"
 	"encoding/binary"
 	"math"
-	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
@ -41,15 +40,12 @@ func New(cfg *Config) (Service, error) {
 	// Try to load existing dataset on startup
 	if err := s.loadExistingDataset(); err != nil {
 		// Log error but don't fail startup - dataset will be loaded on first Update()
-		// This allows the service to start even if no data is available yet
 	}

 	return s, nil
 }

-// loadExistingDataset tries to load the most recent available dataset
 func (s *service) loadExistingDataset() error {
-	// Find the most recent cube file
 	pattern := filepath.Join(s.cfg.Dir, "*.cube")
 	matches, err := filepath.Glob(pattern)
 	if err != nil {
@ -60,7 +56,6 @@ func (s *service) loadExistingDataset() error {
 		return errcodes.ErrNoCubeFilesFound
 	}

-	// Sort by modification time (newest first)
 	var latestFile string
 	var latestTime time.Time

@ -69,7 +64,6 @@ func (s *service) loadExistingDataset() error {
 		if err != nil {
 			continue
 		}
-
 		if info.ModTime().After(latestTime) {
 			latestTime = info.ModTime()
 			latestFile = match
@ -80,18 +74,16 @@ func (s *service) loadExistingDataset() error {
 		return errcodes.ErrNoValidCubeFilesFound
 	}

-	// Check if the file is fresh enough
 	if time.Since(latestTime) > s.cfg.TTL {
 		return errcodes.Wrap(errcodes.ErrLatestCubeFileIsTooOld, "latest cube file is too old")
 	}

-	// Load the dataset
-	c, err := openCube(latestFile)
+	dc := &s.cfg.Dataset
+	c, err := openCube(latestFile, dc)
 	if err != nil {
 		return err
 	}

-	// Extract run time from filename
 	base := filepath.Base(latestFile)
 	runStr := strings.TrimSuffix(base, ".cube")
 	run, err := time.Parse("20060102_15", runStr)
@ -100,94 +92,70 @@ func (s *service) loadExistingDataset() error {
 		return err
 	}

-	ds := &dataset{cube: c, runUTC: run.Unix()}
-	s.data.Store(ds)
-
+	s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
 	return nil
 }

-// Update() downloads missing GRIBs, assembles cube into a single mmap‑file.
 func (s *service) Update(ctx context.Context) error {
-	// Check if we already have fresh data
 	if d := s.data.Load(); d != nil {
 		runTime := time.Unix(d.runUTC, 0)
 		if time.Since(runTime) < s.cfg.TTL {
-			// Data is still fresh, no need to update
 			return nil
 		}
 	}

-	// Check again after acquiring lock (double-checked locking pattern)
 	if d := s.data.Load(); d != nil {
 		runTime := time.Unix(d.runUTC, 0)
 		if time.Since(runTime) < s.cfg.TTL {
-			// Another instance already updated the data
 			return nil
 		}
 	}

-	run := nearestRun(time.Now().UTC().Add(-24 * time.Hour))
+	dc := &s.cfg.Dataset
+	run := nearestRun(time.Now().UTC().Add(-6 * time.Hour))

-	// Check if we already have this run
 	cubePath := filepath.Join(s.cfg.Dir, run.Format("20060102_15")) + ".cube"
 	if _, err := os.Stat(cubePath); err == nil {
-		// File exists, check if it's fresh
 		info, err := os.Stat(cubePath)
 		if err == nil && time.Since(info.ModTime()) < s.cfg.TTL {
-			// File is fresh, just load it
-			c, err := openCube(cubePath)
+			c, err := openCube(cubePath, dc)
 			if err != nil {
 				return err
 			}
-			ds := &dataset{cube: c, runUTC: run.Unix()}
-			s.data.Store(ds)
+			s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
 			s.cache = memCache{ttl: s.cfg.CacheTTL}
 			return nil
 		}
 	}

-	// Download new data using S3 or HTTP
-	var downloadErr error
-	if s.cfg.UseS3 {
-		s3dl, err := NewS3Downloader(s.cfg.Dir, s.cfg.Parallel, s.cfg.S3Bucket, s.cfg.S3Region)
-		if err != nil {
-			return errcodes.Wrap(err, "failed to create S3 downloader")
-		}
-		downloadErr = s3dl.Run(ctx, run)
-	} else {
-		dl := Downloader{
-			Dir:        s.cfg.Dir,
-			Parallel:   s.cfg.Parallel,
-			Client:     http.DefaultClient,
-			DatasetURL: s.cfg.DatasetURL,
-		}
-		downloadErr = dl.Run(ctx, run)
+	downloadCtx, cancel := context.WithTimeout(ctx, 60*time.Minute)
+	defer cancel()
+
+	dl := NewPartialDownloader(s.cfg.Dir, s.cfg.Parallel, dc)
+	if err := dl.Run(downloadCtx, run); err != nil {
+		return err
 	}

-	if downloadErr != nil {
-		return downloadErr
-	}
-
-	// Assemble cube if it doesn't exist
 	if _, err := os.Stat(cubePath); err != nil {
-		if err := assembleCube(s.cfg.Dir, run, cubePath); err != nil {
+		if err := assembleCube(s.cfg.Dir, run, cubePath, dc); err != nil {
 			return err
 		}
 	}

-	c, err := openCube(cubePath)
+	c, err := openCube(cubePath, dc)
 	if err != nil {
 		return err
 	}
-	ds := &dataset{cube: c, runUTC: run.Unix()}
-	s.data.Store(ds)
+	s.data.Store(&dataset{cube: c, ds: dc, runUTC: run.Unix()})
 	s.cache = memCache{ttl: s.cfg.CacheTTL}
 	return nil
 }

-func assembleCube(dir string, run time.Time, cubePath string) error {
-	const sizePerVar = 33 * 47 * 361 * 720 * 4 // 33 time steps (0-96 hours, 3-hour intervals), 47 pressure levels
-	total := int64(sizePerVar * 3)             // 3 variables: gh, u, v
+func assembleCube(dir string, run time.Time, cubePath string, dc *DatasetConfig) error {
+	sizePerVar := dc.SizePerVar()
+	total := dc.CubeSize()
+	gridBytes := int64(dc.GridSize()) * 4
+
 	f, err := os.Create(cubePath)
 	if err != nil {
 		return err
@ -203,27 +171,23 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
 	defer f.Close()

 	pIndex := make(map[int]int)
-	for i, p := range pressureLevels {
+	for i, p := range dc.Levels {
 		pIndex[int(math.Round(p))] = i
 	}

-	for ti, step := range steps {
-		fn := filepath.Join(dir, fileName(run, step))
+	processFile := func(fn string, ti int) error {
 		file, err := os.Open(fn)
 		if err != nil {
 			return err
 		}

 		messages, err := griblib.ReadMessages(file)
-		file.Close() // Close immediately after reading
+		file.Close()
 		if err != nil {
 			return err
 		}

 		for _, m := range messages {
-			// Check if this is a wind component (u or v) or geopotential height
-			// ParameterCategory 2 = momentum, ParameterNumber 2 = u-wind, 3 = v-wind
-			// ParameterCategory 3 = mass, ParameterNumber 5 = geopotential height
 			if m.Section4.ProductDefinitionTemplateNumber != 0 {
 				continue
 			}
@ -231,7 +195,6 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
 			product := m.Section4.ProductDefinitionTemplate

 			var varIdx int
-			// Match tawhiri variable order: ['gh', 'u', 'v'] (indices 0, 1, 2)
 			if product.ParameterCategory == 2 {
 				switch product.ParameterNumber {
 				case 2: // u-wind
@ -242,18 +205,15 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
 					continue
 				}
 			} else if product.ParameterCategory == 3 && product.ParameterNumber == 5 {
-				// geopotential height
-				varIdx = 0
+				varIdx = 0 // geopotential height
 			} else {
 				continue
 			}

-			// Check if this is a pressure level (type 100)
 			if product.FirstSurface.Type != 100 {
 				continue
 			}

-			// Get pressure level in hPa
 			pressure := float64(product.FirstSurface.Value) / 100.0
 			pIdx, ok := pIndex[int(math.Round(pressure))]
 			if !ok {
@ -261,14 +221,27 @@ func assembleCube(dir string, run time.Time, cubePath string) error {
 			}

 			vals := m.Data()
-			// GRIB library returns scan north->south, west->east already in row-major order
 			raw := make([]byte, len(vals)*4)
 			for i, v := range vals {
 				binary.LittleEndian.PutUint32(raw[i*4:], math.Float32bits(float32(v)))
 			}
-			base := int64(varIdx*sizePerVar + (ti*47+pIdx)*361*720*4)
+			base := int64(varIdx)*sizePerVar + (int64(ti)*int64(dc.NP)+int64(pIdx))*gridBytes
 			copy(mm[base:base+int64(len(raw))], raw)
 		}
+		return nil
+	}
+
+	steps := dc.Steps()
+	for ti, step := range steps {
+		fn := filepath.Join(dir, dc.FileName(run, step))
+		if err := processFile(fn, ti); err != nil {
+			return err
+		}
+
+		fnB := filepath.Join(dir, dc.FileNameB(run, step))
+		if err := processFile(fnB, ti); err != nil {
+			return err
+		}
 	}
 	return mm.Flush()
 }
@ -279,24 +252,21 @@ func (s *service) Extract(ctx context.Context, lat, lon, alt float64, ts time.Ti
 	if d == nil {
 		return zero, errcodes.ErrNoDataset
 	}
-	if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(96*time.Hour)) {
+	maxDur := time.Duration(s.cfg.Dataset.MaxHour) * time.Hour
+	if ts.Before(time.Unix(d.runUTC, 0)) || ts.After(time.Unix(d.runUTC, 0).Add(maxDur)) {
 		return zero, errcodes.ErrOutOfBounds
 	}

-	// Try memory cache first
 	key := encodeKey(lat, lon, alt, ts)
 	if v, ok := s.cache.get(key); ok {
 		return [2]float64(v), nil
 	}

-	// Calculate result
 	td := ts.Sub(time.Unix(d.runUTC, 0)).Hours()
 	u, v := d.uv(lat, lon, alt, td)
 	out := [2]float64{u, v}

-	// Cache in memory
 	s.cache.set(key, vec(out))
-
 	return out, nil
 }

--- a/internal/pkg/grib/partial_downloader.go
+++ b/internal/pkg/grib/partial_downloader.go
@ -0,0 +1,350 @@
+package grib
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
+	"git.intra.yksa.space/gsn/predictor/internal/pkg/log"
+	"go.uber.org/zap"
+	"golang.org/x/sync/errgroup"
+)
+
+// PartialDownloader загружает только необходимые поля из GRIB файлов
+// используя HTTP Range requests и .idx индексные файлы
+type PartialDownloader struct {
+	Dir       string
+	Parallel  int
+	Client    *http.Client
+	Variables []string
+	ds        *DatasetConfig
+}
+
+// NewPartialDownloader создаёт новый partial downloader
+func NewPartialDownloader(dir string, parallel int, dc *DatasetConfig) *PartialDownloader {
+	return &PartialDownloader{
+		Dir:      dir,
+		Parallel: parallel,
+		Client: &http.Client{
+			Timeout: 60 * time.Second,
+		},
+		Variables: dc.Variables,
+		ds:        dc,
+	}
+}
+
+// idxEntry представляет запись из .idx файла
+type idxEntry struct {
+	Index     int
+	ByteStart int64
+	Date      string
+	Variable  string
+	Level     string
+	Forecast  string
+}
+
+type ProgressWriter struct {
+	Total      int64
+	Downloaded int64
+	OnProgress func(percent float64)
+}
+
+func (pw *ProgressWriter) Write(p []byte) (int, error) {
+	n := len(p)
+	pw.Downloaded += int64(n)
+	if pw.Total > 0 && pw.OnProgress != nil {
+		percent := float64(pw.Downloaded) / float64(pw.Total) * 100
+		pw.OnProgress(percent)
+	}
+	return n, nil
+}
+
+// parseIdx парсит .idx файл и возвращает записи
+func (d *PartialDownloader) parseIdx(body []byte) []idxEntry {
+	var entries []idxEntry
+	lines := strings.Split(string(body), "\n")
+
+	for _, line := range lines {
+		if line == "" {
+			continue
+		}
+		parts := strings.Split(line, ":")
+		if len(parts) < 7 {
+			continue
+		}
+
+		byteStart, _ := strconv.ParseInt(parts[1], 10, 64)
+		entries = append(entries, idxEntry{
+			Index:     len(entries),
+			ByteStart: byteStart,
+			Date:      parts[2],
+			Variable:  parts[3],
+			Level:     parts[4],
+			Forecast:  parts[5],
+		})
+	}
+	return entries
+}
+
+// filterEntries фильтрует записи по нужным переменным и уровням давления
+func (d *PartialDownloader) filterEntries(entries []idxEntry) []idxEntry {
+	var filtered []idxEntry
+
+	for _, e := range entries {
+		isNeededVar := false
+		for _, v := range d.Variables {
+			if v == e.Variable {
+				isNeededVar = true
+				break
+			}
+		}
+
+		isPressureLevel := strings.HasSuffix(e.Level, " mb")
+
+		if isNeededVar && isPressureLevel {
+			filtered = append(filtered, e)
+		}
+	}
+
+	return filtered
+}
+
+// Вспомогательная функция для выполнения запроса с повторами
+func (d *PartialDownloader) doWithRetry(ctx context.Context, req *http.Request) (*http.Response, error) {
+	var resp *http.Response
+	var err error
+
+	backoff := 1 * time.Second
+	maxRetries := 3
+
+	for i := 0; i < maxRetries; i++ {
+		resp, err = d.Client.Do(req)
+		if err == nil && resp.StatusCode < 500 {
+			return resp, nil
+		}
+
+		if resp != nil {
+			resp.Body.Close()
+		}
+
+		log.Ctx(ctx).Warn("retry download", zap.Int("attempt", i+1), zap.Error(err))
+
+		select {
+		case <-time.After(backoff):
+			backoff *= 2
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
+	}
+	return nil, err
+}
+
+// downloadRange загружает диапазон байтов из URL
+func (d *PartialDownloader) downloadRange(ctx context.Context, url string, start, end int64, out io.Writer) error {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return err
+	}
+
+	rangeHeader := fmt.Sprintf("bytes=%d-", start)
+	if end > 0 {
+		rangeHeader = fmt.Sprintf("bytes=%d-%d", start, end)
+	}
+	req.Header.Set("Range", rangeHeader)
+
+	resp, err := d.Client.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK {
+		return errcodes.Wrap(errcodes.ErrDownload, "bad status: "+resp.Status)
+	}
+
+	_, err = io.Copy(out, resp.Body)
+	return err
+}
+
+func (d *PartialDownloader) downloadFieldsFromURL(ctx context.Context, url string, dst string, step int) (err error) {
+	idxURL := url + ".idx"
+	tmp := dst + ".part"
+
+	if info, err := os.Stat(dst); err == nil && info.Size() > 0 {
+		return nil
+	}
+
+	reqIdx, _ := http.NewRequestWithContext(ctx, http.MethodGet, idxURL, nil)
+	respIdx, err := d.doWithRetry(ctx, reqIdx)
+	if err != nil {
+		return errcodes.Wrap(err, "failed to get idx")
+	}
+	defer respIdx.Body.Close()
+
+	idxBody, _ := io.ReadAll(respIdx.Body)
+	entries := d.parseIdx(idxBody)
+	filtered := d.filterEntries(entries)
+	if len(filtered) == 0 {
+		return nil
+	}
+
+	var totalBytes int64
+	type chunk struct{ start, end int64 }
+	chunks := make([]chunk, 0, len(filtered))
+
+	for _, entry := range filtered {
+		var endByte int64 = -1
+		for j, e := range entries {
+			if e.ByteStart == entry.ByteStart && j+1 < len(entries) {
+				endByte = entries[j+1].ByteStart - 1
+				break
+			}
+		}
+		chunks = append(chunks, chunk{entry.ByteStart, endByte})
+		if endByte > 0 {
+			totalBytes += (endByte - entry.ByteStart + 1)
+		}
+	}
+
+	f, err := os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
+	if err != nil {
+		return err
+	}
+
+	var downloaded int64
+
+	err = func() error {
+		defer f.Close()
+		bufWriter := bufio.NewWriterSize(f, 1024*1024)
+
+		for i, c := range chunks {
+			countingWriter := &proxyWriter{
+				Writer: bufWriter,
+				OnWrite: func(n int) {
+					downloaded += int64(n)
+					if totalBytes > 0 && i%20 == 0 {
+						pct := float64(downloaded) / float64(totalBytes) * 100
+						log.Ctx(ctx).Debug("download progress",
+							zap.Int("step", step),
+							zap.String("pct", fmt.Sprintf("%.1f%%", pct)))
+					}
+				},
+			}
+
+			if err := d.downloadRange(ctx, url, c.start, c.end, countingWriter); err != nil {
+				return err
+			}
+		}
+		return bufWriter.Flush()
+	}()
+
+	if err != nil {
+		f.Close()
+		os.Remove(tmp)
+		return err
+	}
+
+	return d.safeRename(tmp, dst)
+}
+
+type proxyWriter struct {
+	io.Writer
+	OnWrite func(int)
+}
+
+func (p *proxyWriter) Write(data []byte) (int, error) {
+	n, err := p.Writer.Write(data)
+	if n > 0 && p.OnWrite != nil {
+		p.OnWrite(n)
+	}
+	return n, err
+}
+
+func (d *PartialDownloader) safeRename(src, dst string) error {
+	var lastErr error
+	for i := 0; i < 5; i++ {
+		if err := os.Rename(src, dst); err == nil {
+			return nil
+		} else {
+			lastErr = err
+		}
+		time.Sleep(150 * time.Millisecond)
+	}
+	return fmt.Errorf("rename failed: %w", lastErr)
+}
+
+// Run запускает загрузку всех необходимых файлов (pgrb2 + pgrb2b)
+func (d *PartialDownloader) Run(ctx context.Context, run time.Time) error {
+	log.Ctx(ctx).Info("starting partial download",
+		zap.Time("run", run),
+		zap.Strings("variables", d.Variables))
+
+	g, ctx := errgroup.WithContext(ctx)
+	sem := make(chan struct{}, d.Parallel)
+	steps := d.ds.Steps()
+
+	for _, step := range steps {
+		step := step
+
+		// Download primary pgrb2
+		sem <- struct{}{}
+		g.Go(func() error {
+			defer func() { <-sem }()
+			url := d.ds.GribURL(run, step)
+			dst := filepath.Join(d.Dir, d.ds.FileName(run, step))
+			return d.downloadFieldsFromURL(ctx, url, dst, step)
+		})
+
+		// Download secondary pgrb2b
+		sem <- struct{}{}
+		g.Go(func() error {
+			defer func() { <-sem }()
+			url := d.ds.GribURLB(run, step)
+			dst := filepath.Join(d.Dir, d.ds.FileNameB(run, step))
+			return d.downloadFieldsFromURL(ctx, url, dst, step)
+		})
+	}
+
+	return g.Wait()
+}
+
+// GetLatestModelRun находит последний доступный прогноз GFS
+func GetLatestModelRun(ctx context.Context, dc *DatasetConfig) (time.Time, error) {
+	now := time.Now().UTC()
+	hour := now.Hour() - (now.Hour() % 6)
+	current := time.Date(now.Year(), now.Month(), now.Day(), hour, 0, 0, 0, time.UTC)
+
+	client := &http.Client{Timeout: 10 * time.Second}
+
+	for i := 0; i < 8; i++ {
+		url := dc.GribURL(current, dc.MaxHour)
+
+		req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
+		if err != nil {
+			current = current.Add(-6 * time.Hour)
+			continue
+		}
+
+		resp, err := client.Do(req)
+		if err == nil && resp.StatusCode == http.StatusOK {
+			resp.Body.Close()
+			log.Ctx(ctx).Info("found latest model run", zap.Time("run", current))
+			return current, nil
+		}
+		if resp != nil {
+			resp.Body.Close()
+		}
+
+		current = current.Add(-6 * time.Hour)
+	}
+
+	return time.Time{}, errcodes.Wrap(errcodes.ErrDownload, "no recent GFS forecast found")
+}
--- a/internal/pkg/grib/pressure.go
+++ b/internal/pkg/grib/pressure.go
@ -2,15 +2,6 @@ package grib

 import "math"

-// 47 pressure levels matching tawhiri configuration
-var pressureLevels = []float64{
-	1000, 975, 950, 925, 900, 875, 850, 825, 800, 775,
-	750, 725, 700, 675, 650, 625, 600, 575, 550, 525,
-	500, 475, 450, 425, 400, 375, 350, 325, 300, 275,
-	250, 225, 200, 175, 150, 125, 100, 70, 50, 30,
-	20, 10, 7, 5, 3, 2, 1,
-}
-
 func pressureFromAlt(alt float64) float64 { // ICAO ISA
 	return 1013.25 * math.Pow(1-alt/44307.69396, 5.255877)
 }
--- a/internal/pkg/grib/s3_downloader.go
+++ b/internal/pkg/grib/s3_downloader.go
@ -1,265 +0,0 @@
-package grib
-
-import (
-	"context"
-	"fmt"
-	"io"
-	"os"
-	"path/filepath"
-	"time"
-
-	"git.intra.yksa.space/gsn/predictor/internal/pkg/errcodes"
-	"github.com/aws/aws-sdk-go-v2/aws"
-	"github.com/aws/aws-sdk-go-v2/config"
-	"github.com/aws/aws-sdk-go-v2/service/s3"
-	"golang.org/x/sync/errgroup"
-)
-
-// S3Downloader downloads GRIB files from AWS S3
-type S3Downloader struct {
-	Dir      string
-	Parallel int
-	Bucket   string
-	Region   string
-	Client   *s3.Client
-}
-
-// NewS3Downloader creates a new S3 downloader with anonymous access
-func NewS3Downloader(dir string, parallel int, bucket, region string) (*S3Downloader, error) {
-	// Create AWS config with anonymous credentials for public bucket
-	cfg, err := config.LoadDefaultConfig(context.Background(),
-		config.WithRegion(region),
-		config.WithCredentialsProvider(aws.AnonymousCredentials{}),
-	)
-	if err != nil {
-		return nil, errcodes.Wrap(err, "failed to load AWS config")
-	}
-
-	client := s3.NewFromConfig(cfg)
-
-	return &S3Downloader{
-		Dir:      dir,
-		Parallel: parallel,
-		Bucket:   bucket,
-		Region:   region,
-		Client:   client,
-	}, nil
-}
-
-// s3Key generates the S3 key for a GRIB file
-// Path format: gfs.YYYYMMDD/HH/atmos/gfs.tHHz.pgrb2.0p50.fFFF
-func (d *S3Downloader) s3Key(run string, hour int, step int) string {
-	return fmt.Sprintf("gfs.%s/%02d/atmos/gfs.t%02dz.pgrb2.0p50.f%03d", run, hour, hour, step)
-}
-
-// CheckFileExists checks if a file exists in S3 using HeadObject
-func (d *S3Downloader) CheckFileExists(ctx context.Context, key string) (bool, int64, error) {
-	input := &s3.HeadObjectInput{
-		Bucket: aws.String(d.Bucket),
-		Key:    aws.String(key),
-	}
-
-	result, err := d.Client.HeadObject(ctx, input)
-	if err != nil {
-		// Check if error is NotFound
-		// AWS SDK v2 doesn't export specific error types, check error string
-		if isNotFoundError(err) {
-			return false, 0, nil
-		}
-		return false, 0, errcodes.Wrap(err, "failed to check file existence")
-	}
-
-	size := int64(0)
-	if result.ContentLength != nil {
-		size = *result.ContentLength
-	}
-
-	return true, size, nil
-}
-
-// isNotFoundError checks if error is a NotFound error
-func isNotFoundError(err error) bool {
-	if err == nil {
-		return false
-	}
-	// AWS SDK v2 error handling
-	errStr := err.Error()
-	return contains(errStr, "NotFound") || contains(errStr, "404") || contains(errStr, "NoSuchKey")
-}
-
-func contains(s, substr string) bool {
-	return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && findSubstring(s, substr))
-}
-
-func findSubstring(s, substr string) bool {
-	for i := 0; i <= len(s)-len(substr); i++ {
-		if s[i:i+len(substr)] == substr {
-			return true
-		}
-	}
-	return false
-}
-
-// ListAvailableFiles lists all available files for a given run
-func (d *S3Downloader) ListAvailableFiles(ctx context.Context, run string, hour int) ([]string, error) {
-	prefix := fmt.Sprintf("gfs.%s/%02d/atmos/", run, hour)
-
-	input := &s3.ListObjectsV2Input{
-		Bucket: aws.String(d.Bucket),
-		Prefix: aws.String(prefix),
-	}
-
-	var files []string
-	paginator := s3.NewListObjectsV2Paginator(d.Client, input)
-
-	for paginator.HasMorePages() {
-		page, err := paginator.NextPage(ctx)
-		if err != nil {
-			return nil, errcodes.Wrap(err, "failed to list S3 objects")
-		}
-
-		for _, obj := range page.Contents {
-			if obj.Key != nil {
-				files = append(files, *obj.Key)
-			}
-		}
-	}
-
-	return files, nil
-}
-
-// fetchFromS3 downloads a file from S3 to local disk with retry logic
-func (d *S3Downloader) fetchFromS3(ctx context.Context, key, dst string) (err error) {
-	// Check if final file already exists
-	if _, err := os.Stat(dst); err == nil {
-		return nil
-	}
-
-	const maxRetries = 3
-	var lastErr error
-
-	for attempt := 0; attempt < maxRetries; attempt++ {
-		if attempt > 0 {
-			// Exponential backoff: 2s, 4s, 8s
-			waitTime := time.Duration(1<<uint(attempt)) * time.Second
-			time.Sleep(waitTime)
-		}
-
-		lastErr = d.fetchFromS3Once(ctx, key, dst)
-		if lastErr == nil {
-			return nil
-		}
-	}
-
-	return errcodes.Wrap(lastErr, fmt.Sprintf("failed after %d retries", maxRetries))
-}
-
-// fetchFromS3Once performs a single download attempt
-func (d *S3Downloader) fetchFromS3Once(ctx context.Context, key, dst string) (err error) {
-	tmp := dst + ".part"
-
-	// Remove old .part file if it exists
-	os.Remove(tmp)
-
-	f, err := os.Create(tmp)
-	if err != nil {
-		return err
-	}
-
-	fileClosed := false
-	// Cleanup .part file on any error (using named return value)
-	defer func() {
-		if !fileClosed {
-			f.Close()
-		}
-		if err != nil {
-			os.Remove(tmp)
-		}
-	}()
-
-	// Check if file exists in S3
-	exists, size, checkErr := d.CheckFileExists(ctx, key)
-	if checkErr != nil {
-		return errcodes.Wrap(checkErr, "failed to check S3 file existence")
-	}
-	if !exists {
-		return errcodes.Wrap(errcodes.ErrDownload, fmt.Sprintf("file not found in S3: %s", key))
-	}
-
-	// Download from S3
-	input := &s3.GetObjectInput{
-		Bucket: aws.String(d.Bucket),
-		Key:    aws.String(key),
-	}
-
-	result, err := d.Client.GetObject(ctx, input)
-	if err != nil {
-		return errcodes.Wrap(err, "failed to get S3 object")
-	}
-	defer result.Body.Close()
-
-	// Copy to local file
-	written, err := io.Copy(f, result.Body)
-	if err != nil {
-		return errcodes.Wrap(err, fmt.Sprintf("failed to write S3 object to file %s", dst))
-	}
-
-	// Verify size if available
-	if size > 0 && written != size {
-		return errcodes.Wrap(errcodes.ErrDownload, fmt.Sprintf("size mismatch: got %d bytes, expected %d", written, size))
-	}
-
-	// Close file before rename
-	if err := f.Close(); err != nil {
-		return err
-	}
-	fileClosed = true
-
-	// If rename fails, err will be set and defer will cleanup .part file
-	return os.Rename(tmp, dst)
-}
-
-// Run downloads all required GRIB files for a forecast run
-func (d *S3Downloader) Run(ctx context.Context, run time.Time) error {
-	runStr := run.Format("20060102")
-	hour := run.Hour()
-
-	// First, list available files to verify they exist
-	availableFiles, err := d.ListAvailableFiles(ctx, runStr, hour)
-	if err != nil {
-		return errcodes.Wrap(err, "failed to list available files")
-	}
-
-	if len(availableFiles) == 0 {
-		return errcodes.Wrap(errcodes.ErrDownload, fmt.Sprintf("no files found for run %s/%02d", runStr, hour))
-	}
-
-	// Build a map of available files for quick lookup
-	availableMap := make(map[string]bool)
-	for _, file := range availableFiles {
-		availableMap[file] = true
-	}
-
-	g, ctx := errgroup.WithContext(ctx)
-	sem := make(chan struct{}, d.Parallel)
-
-	for _, step := range steps {
-		step := step
-		key := d.s3Key(runStr, hour, step)
-
-		// Check if file is available in S3
-		if !availableMap[key] {
-			// Log warning but don't fail - some forecast hours might not be available yet
-			continue
-		}
-
-		sem <- struct{}{}
-		g.Go(func() error {
-			defer func() { <-sem }()
-			dst := filepath.Join(d.Dir, fileName(run, step))
-			return d.fetchFromS3(ctx, key, dst)
-		})
-	}
-
-	return g.Wait()
-}
--- a/internal/pkg/grib/util.go
+++ b/internal/pkg/grib/util.go
@ -6,25 +6,11 @@ import (
 	"time"
 )

-// Generate steps from 0 to 96 with step 3 hours (33 steps total)
-// GFS provides 3-hourly data for 0-120 hours, we use first 96 hours (0, 3, 6, ..., 96)
-var steps = func() []int {
-	result := make([]int, 0, 33)
-	for i := 0; i <= 96; i += 3 {
-		result = append(result, i)
-	}
-	return result
-}()
-
 func nearestRun(t time.Time) time.Time {
 	h := t.UTC().Hour() - t.UTC().Hour()%6
 	return time.Date(t.Year(), t.Month(), t.Day(), h, 0, 0, 0, time.UTC)
 }

-func fileName(run time.Time, step int) string {
-	return fmt.Sprintf("gfs.t%02dz.pgrb2.0p50.f%03d", run.Hour(), step)
-}
-
 func encodeKey(a ...any) uint64 {
 	h := fnv.New64a()
 	for _, v := range a {