feat: s3 download

This commit is contained in:
Anatoly Antonov 2025-10-20 19:10:07 +09:00
parent a850615e1f
commit c4f355a32e
15 changed files with 590 additions and 109 deletions

View file

@ -0,0 +1,89 @@
package main
import (
"context"
"fmt"
"log"
"os"
"time"
"git.intra.yksa.space/gsn/predictor/internal/pkg/grib"
)
func main() {
ctx := context.Background()
// Create S3 downloader
downloader, err := grib.NewS3Downloader(
"/tmp/grib_test",
4, // parallel downloads
"noaa-gfs-bdp-pds",
"us-east-1",
)
if err != nil {
log.Fatalf("Failed to create S3 downloader: %v", err)
}
// Ensure directory exists
if err := os.MkdirAll("/tmp/grib_test", 0o755); err != nil {
log.Fatalf("Failed to create directory: %v", err)
}
// Find nearest run (6-hour intervals: 00, 06, 12, 18 UTC)
now := time.Now().UTC()
hour := now.Hour() - (now.Hour() % 6)
// Use data from 6 hours ago to ensure it's available
run := time.Date(now.Year(), now.Month(), now.Day(), hour, 0, 0, 0, time.UTC).Add(-6 * time.Hour)
fmt.Printf("Testing S3 download for run: %s\n", run.Format("2006-01-02 15:04 MST"))
// List available files first
runStr := run.Format("20060102")
fmt.Printf("Listing available files for %s/%02d...\n", runStr, run.Hour())
files, err := downloader.ListAvailableFiles(ctx, runStr, run.Hour())
if err != nil {
log.Fatalf("Failed to list files: %v", err)
}
fmt.Printf("Found %d files in S3:\n", len(files))
if len(files) > 0 {
// Show first 5 files
for i, file := range files {
if i >= 5 {
fmt.Printf("... and %d more files\n", len(files)-5)
break
}
fmt.Printf(" - %s\n", file)
}
}
// Try downloading just first 3 forecast hours (f000, f001, f002)
fmt.Println("\nTesting download of first 3 forecast hours...")
testRun := run
// Create a timeout context for the download
downloadCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
if err := downloader.Run(downloadCtx, testRun); err != nil {
log.Fatalf("Failed to download: %v", err)
}
fmt.Println("\nDownload completed successfully!")
// Check downloaded files
entries, err := os.ReadDir("/tmp/grib_test")
if err != nil {
log.Fatalf("Failed to read directory: %v", err)
}
fmt.Printf("\nDownloaded %d files:\n", len(entries))
for i, entry := range entries {
if i >= 10 {
fmt.Printf("... and %d more files\n", len(entries)-10)
break
}
info, _ := entry.Info()
fmt.Printf(" - %s (%.2f MB)\n", entry.Name(), float64(info.Size())/1024/1024)
}
}

68
scripts/test_s3_simple.go Normal file
View file

@ -0,0 +1,68 @@
package main
import (
"context"
"fmt"
"io"
"log"
"os"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/s3"
)
func main() {
ctx := context.Background()
// Create AWS config with anonymous credentials
cfg, err := config.LoadDefaultConfig(ctx,
config.WithRegion("us-east-1"),
config.WithCredentialsProvider(aws.AnonymousCredentials{}),
)
if err != nil {
log.Fatalf("Failed to load config: %v", err)
}
client := s3.NewFromConfig(cfg)
// Try to download a single file
bucket := "noaa-gfs-bdp-pds"
key := "gfs.20251020/00/atmos/gfs.t00z.pgrb2.0p50.f000"
fmt.Printf("Downloading: s3://%s/%s\n", bucket, key)
input := &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(key),
}
result, err := client.GetObject(ctx, input)
if err != nil {
log.Fatalf("Failed to get object: %v", err)
}
defer result.Body.Close()
// Create output file
outFile := "/tmp/test_grib.part"
f, err := os.Create(outFile)
if err != nil {
log.Fatalf("Failed to create file: %v", err)
}
defer f.Close()
// Copy data
written, err := io.Copy(f, result.Body)
if err != nil {
log.Fatalf("Failed to copy data: %v (wrote %d bytes)", err, written)
}
fmt.Printf("Successfully downloaded %d bytes\n", written)
// Rename
if err := os.Rename(outFile, "/tmp/test_grib"); err != nil {
log.Fatalf("Failed to rename: %v", err)
}
fmt.Println("Download complete!")
}