\documentclass[a4paper,11pt]{article}
\usepackage[margin=1in]{geometry}
\usepackage{amsmath, amssymb}
\usepackage{algorithm, algpseudocode}
\usepackage{hyperref}

\title{stratoflights-predictor: Mathematical Reference}
\author{stratoflights-predictor}
\date{}

\begin{document}
\maketitle

\noindent This document is the end-to-end mathematical specification of the
trajectory calculation performed by stratoflights-predictor. It is meant
to be detailed enough to permit hand verification of the numerical
output. Section~\ref{sec:numerics} covers the small numerics library
(\verb|internal/numerics|); the remaining sections describe the engine
(\verb|internal/engine|) and the data plane.

\tableofcontents

% =========================================================================
\section{State vector and equations of motion}
\label{sec:state}

\paragraph{State vector.} The balloon state is the four-tuple
\[
  \mathbf{s}(t) \;=\; \bigl(t,\; \varphi(t),\; \lambda(t),\; h(t)\bigr),
\]
where $t$ is UNIX seconds, $\varphi \in [-90, 90]$ is geographic latitude
in degrees, $\lambda \in [0, 360)$ is geographic longitude in degrees,
and $h$ is altitude above mean sea level in metres. Inside the
implementation, the spatial part $(\varphi, \lambda, h)$ is the
\verb|engine.State| struct; $t$ is tracked separately by the integrator.

\paragraph{Equations of motion.} The time derivative of state is the
direction-agnostic vector
\[
  \dot{\mathbf{s}}(t) \;=\; \bigl(\dot \varphi,\; \dot \lambda,\; \dot h\bigr)
  \;=\; \sum_{m \in \text{Models}(t)} \mathbf{F}_m(t, \mathbf{s}),
\]
i.e.\ the sum of the active stage's models evaluated at the current
state. The supported per-model contributions are:

\paragraph{Constant rate.} A purely vertical model with no horizontal
component, used for the standard balloon ascent profile:
\[
  \mathbf{F}_{\text{const}}(t, \mathbf{s}) = (0, 0, r), \qquad r \in \mathbb{R}.
\]
Positive $r$ is upward; a negative $r$ may be combined with reverse-time
integration to model an ascent backwards from a known apex.

\paragraph{Parachute descent.} The vertical contribution under a constant
drag coefficient with the NASA atmosphere model density $\rho(h)$:
\[
  \mathbf{F}_{\text{drag}}(t, \mathbf{s}) = \Bigl(0, 0, -\frac{k}{\sqrt{\rho(h)}}\Bigr),
  \qquad k = r_0 \cdot 1{.}1045,
\]
where $r_0$ is the descent rate at sea level. Density is computed
piecewise from the layered model described in
\href{https://www.grc.nasa.gov/WWW/K-12/airplane/atmosmet.html}{NASA's
atmosphere page}.

\paragraph{Piecewise rate.} A schedule $\{(\tau_i, r_i)\}_{i=1}^N$
parameterised in either absolute UNIX time, or seconds since
profile start, or seconds since the propagator's own start. Resolution
happens lazily through the \verb|Propagator.BuildModel| hook so the same
spec can be reused across profiles with different launch times.
The contribution at time $t$ is
\[
  \mathbf{F}_{\text{pwc}}(t, \mathbf{s}) = (0, 0, r_{i^\star}),
  \qquad i^\star = \min\{i : \tau_i > t\}.
\]

\paragraph{Wind transport.} The horizontal contribution from sampling the
loaded wind field $W$:
\[
  \mathbf{F}_{\text{wind}}(t, \mathbf{s}) = \Bigl(
    \frac{180}{\pi}\,\frac{v}{R + h},\;\;
    \frac{180}{\pi}\,\frac{u}{(R + h)\cos\bigl(\varphi\,\pi/180\bigr)},\;\;
    0
  \Bigr),
\]
where $(u, v) = W(t, \varphi, \lambda, h)$ are the eastward and northward
wind components in metres per second, and $R = 6{,}371{,}009$~m is the
spherical Earth radius. The implementation lives in
\verb|engine.WindTransport| (\verb|engine/models.go|).

\paragraph{Coordinate system.} The model is a spherical Earth in
plate-carrée (latitude/longitude/altitude) coordinates. This matches the
reference Tawhiri predictor exactly and is necessary for bit-identical
back-to-back testing. A WGS84/ECEF variant is planned but deferred: it
would require converting U/V wind components from the GFS sphere model
to the ellipsoid, which is not a trivial coordinate transform.

% =========================================================================
\section{Profiles and propagators}
\label{sec:profile}

\paragraph{Propagator.} A propagator owns one Model and a list of
Constraints; it produces a sequence of trajectory points via classical
Runge--Kutta--4 integration with step $\Delta t$ (positive for forward,
negative for reverse propagation):
\[
  \Pi : (t_0, \mathbf{s}_0) \;\longmapsto\; \bigl[(t_k, \mathbf{s}_k)\bigr]_{k=0}^{K}.
\]
The sequence ends at the first $k$ where any constraint is violated;
the violation point is refined by binary search (see
\S\ref{sec:numerics}).

\paragraph{Profile.} A profile is an ordered chain of propagators
$[\Pi_1, \Pi_2, \ldots, \Pi_N]$. Stage $i$ starts where stage $i-1$
ended; the time direction (sign of $\Delta t$) is shared.

\paragraph{Constraint actions.} When a constraint $c$ is violated at the
refined point $(t^\star, \mathbf{s}^\star)$, $c.\text{Action}$ controls
the dispatch:
\begin{itemize}
  \item \texttt{stop} — the profile ends at $(t^\star, \mathbf{s}^\star)$.
  \item \texttt{fallback} — the current propagator hands off to its
        \texttt{Fallback} propagator (chains supported).
  \item \texttt{clip} — the violated coordinate is clipped to the
        constraint's boundary and integration continues. Useful for soft
        constraints such as ``hold altitude above 500~m''.
\end{itemize}
Constraints fire on full RK4 steps only, never on intermediate
sub-evaluations. This matches the reference Tawhiri behaviour
bit-for-bit.

\paragraph{Reverse propagation.} A profile with \verb|Direction = Reverse|
runs every propagator with $\Delta t = -\Delta t$. Models are
direction-agnostic: their derivative formulas above hold unchanged. The
typical use is to start from a known landing point and recover the
launch position by integrating backwards in time.

% =========================================================================
\section{Constraint geometry}
\label{sec:constraints}

The engine ships four constraint primitives.

\paragraph{Scalar comparison: altitude.}
\(
  c_{\text{alt}}(t, \mathbf{s}) \;=\; h \,\mathrel{\bigotimes}\, h_0,
\)
where $\bigotimes \in \{<, \le, >, \ge, =\}$ is the configured operator
and $h_0$ is the limit (metres). The implementation is
\verb|engine.Altitude| (\verb|engine/constraints.go|).

\paragraph{Scalar comparison: time.} Same shape as altitude, but acting
on $t$ in UNIX seconds. Implementation: \verb|engine.Time|.

\paragraph{Terrain contact.}
\(
  c_{\text{terr}}(t, \mathbf{s}) = \bigl(z(\varphi, \lambda) > h\bigr),
\)
with $z$ provided by the ruaumoko-compatible elevation dataset.

\paragraph{Polygon.} For a polygon $P$ with vertices
$(\varphi_i, \lambda_i)_{i=1}^N$ and mode $\mu \in
\{\text{inside}, \text{outside}\}$, the constraint is
$c_{\text{poly}}(\mathbf{s}) = \bigl(\mathbf{s} \in P\bigr) \oplus
[\mu = \text{outside}]$. Containment is tested by ray casting in
plate-carrée after normalising every longitude to within 180\textdegree{}
of the first vertex; this handles antimeridian-crossing edges so long
as the polygon spans no more than 180\textdegree{} in longitude.

% =========================================================================
\section{Numerics library}
\label{sec:numerics}

The numerics package (\verb|internal/numerics|) provides four primitives:
regular-grid bracketing, multilinear interpolation, monotone bisection,
and classical RK4 with termination-point refinement.

\subsection{Regular-grid bracketing}

\paragraph{Definition.} An \emph{axis} is the regularly-spaced sequence
$x_i = \ell + i \cdot s$ for $i = 0, 1, \ldots, N - 1$, parameterised by
the left edge $\ell$, the step $s > 0$, and the point count $N$.

Given a query $v$, the \emph{bracket} is the pair $(i_0, i_1)$ with
$x_{i_0} \le v < x_{i_1}$ and the dimensionless position
\[
  f = \frac{v - x_{i_0}}{s} \in [0, 1).
\]
Implemented as \verb|Axis.Locate| in \verb|internal/numerics/grid.go|.

\paragraph{Wrapping axes.} For periodic axes (e.g.\ longitude), the
sequence is extended by the convention $x_N = x_0$ so a value approaching
$x_N$ from below brackets $(N{-}1, 0)$ with fraction
$f = (v - x_{N-1})/s$.

\paragraph{Worked example.} Latitude axis with $\ell = -90$, $s = 0{.}5$,
$N = 361$. Query $v = -89{.}75$ yields $p = 0{.}5$, so $i_0 = 0$,
$i_1 = 1$, $f = 0{.}5$.

\subsection{Multilinear interpolation}

\paragraph{Definition.} For a scalar field $u$ defined at the grid nodes
of three axes, the trilinear interpolant at brackets
$b_a, b_b, b_c$ is
\[
  \tilde u = \sum_{i, j, k \in \{0, 1\}} w_{a,i} \, w_{b,j} \, w_{c,k}
  \; u\bigl(b_a^i, b_b^j, b_c^k\bigr),
\]
where $w_{\bullet, 0} = 1 - f_\bullet$ and $w_{\bullet, 1} = f_\bullet$.
The corner terms are accumulated in the order
$(0,0,0), (0,0,1), \ldots, (1,1,1)$, matching the reference Cython
implementation so that double-precision results agree byte for byte.

\paragraph{Linear exactness.} For any affine field
$u(i, j, k) = \alpha i + \beta j + \gamma k + \delta$, the formula returns
$\alpha p_a + \beta p_b + \gamma p_c + \delta$ exactly (modulo
floating-point rounding), where $p_\bullet = b_\bullet^0 + f_\bullet$.

\subsection{Monotone bisection}

For an integer-indexed monotone non-decreasing sequence
$f : \{i_{\min}, \ldots, i_{\max}\} \to \mathbb{R}$ and a target $t$,
\verb|Bisect| returns the largest index $i^\star$ with $f(i^\star) < t$.
Used by the wind sampler to locate the pressure level bracketing the
query altitude. Time complexity:
$\mathcal{O}(\log(i_{\max} - i_{\min}))$.

\subsection{Classical RK4}

\paragraph{Definition.} For a state $y$, derivative $\dot y = f(t, y)$,
and step $\Delta t$, \verb|RK4Step| applies
\[
\begin{aligned}
  k_1 &= f(t, y), \\
  k_2 &= f\bigl(t + \tfrac{\Delta t}{2}, \; y + \tfrac{\Delta t}{2} k_1\bigr), \\
  k_3 &= f\bigl(t + \tfrac{\Delta t}{2}, \; y + \tfrac{\Delta t}{2} k_2\bigr), \\
  k_4 &= f\bigl(t + \Delta t, \; y + \Delta t \cdot k_3\bigr), \\
  y(t + \Delta t) &= y + \tfrac{\Delta t}{6}\bigl(k_1 + 2 k_2 + 2 k_3 + k_4\bigr).
\end{aligned}
\]
Reverse-time integration uses $\Delta t < 0$ unchanged; the implementation
contains no branch on the sign of $\Delta t$. Domain-specific vector
arithmetic (longitude wrap) is injected via \verb|VecAdd|.

\subsection{Termination refinement}

After each integration step the propagator checks one or more
constraints. When a constraint reports a violation between $(t_1, y_1)$
(not violated) and $(t_2, y_2)$ (violated), \verb|RefineTrigger|
locates the crossing within tolerance $\tau \in (0, 1)$ by binary
search in the linear-interpolation parameter $\lambda \in [0, 1]$:

\begin{algorithm}[H]
\caption{RefineTrigger}\label{alg:refine}
\begin{algorithmic}[1]
  \State $L \gets 0,\; R \gets 1$
  \State $t_3 \gets t_2,\; y_3 \gets y_2$
  \While{$R - L > \tau$}
    \State $m \gets (L + R)/2$
    \State $t_3 \gets (1 - m)\,t_1 + m\,t_2$
    \State $y_3 \gets \mathrm{lerp}(y_1, y_2, m)$
    \If{constraint violated at $(t_3, y_3)$}
      \State $R \gets m$
    \Else
      \State $L \gets m$
    \EndIf
  \EndWhile
  \State \Return $(t_3, y_3)$
\end{algorithmic}
\end{algorithm}

After $\lceil \log_2 \tau^{-1} \rceil$ iterations, $R - L \le \tau$.
With $\tau = 0{.}01$ and $\Delta t = 60$~s, the returned point is within
$0{.}6$~s of the true crossing in parameter space; the corresponding
altitude error is bounded by $0{.}6\,|\dot y|$, which for typical
balloon ascent and parachute descent rates is at most $\sim 3$~m.

The returned point is the \emph{last midpoint sampled} rather than
guaranteed to lie on the triggered side; this matches the reference
Tawhiri implementation byte for byte.

% =========================================================================
\section{Wind data pipeline}
\label{sec:winddata}

\paragraph{Data source.} NOAA GFS 0.5\textdegree{} (default) or 0.25\textdegree{}
forecasts, optionally subset by region or hour range. GEFS ensemble
runs are supported by selecting one of the 21 members; each member is a
separate dataset (\verb|DatasetID.Subset.Members = \{m\}|).

\paragraph{Cube layout.} A flat C-order row-major float32 array, shape
$(N_{\text{hours}}, N_{\text{levels}}, 3, N_{\text{lat}}, N_{\text{lng}})$,
where the variable axis is fixed to (HGT, UGRD, VGRD). Per-variant sizes
live in \verb|internal/weather/gfs/variant.go|.

\paragraph{Sampling.} Given a query $(t, \varphi, \lambda, h)$, the
sampler computes the time-in-hours offset
$\tau = (t - t_0)/3600$ from the dataset epoch $t_0$, brackets
$(\tau, \varphi, \lambda)$ on the three horizontal axes, then bisects
the pressure-level axis to find the largest level $\ell$ whose
trilinearly-interpolated HGT is below $h$. Wind components are
extracted via two more trilinear evaluations (at levels $\ell$ and
$\ell + 1$) and linearly interpolated in altitude:
\[
  W(t, \varphi, \lambda, h) = \alpha \cdot W_\ell + (1 - \alpha) \cdot W_{\ell+1},
  \qquad
  \alpha = \frac{H_{\ell+1} - h}{H_{\ell+1} - H_\ell}.
\]

% =========================================================================
\section{Coverage and dataset selection}
\label{sec:coverage}

A loaded dataset $\mathcal{D}$ exposes its \emph{coverage}
$C_\mathcal{D} = (R_\mathcal{D}, [t_0, t_1])$ where $R_\mathcal{D}$ is a
geographic bounding box (possibly antimeridian-spanning) and
$[t_0, t_1]$ is the temporal extent. When more than one dataset is
loaded simultaneously, the predictor selects the first one whose
$C_\mathcal{D}$ contains the launch query. Regional / sub-range
datasets thus complement the global default.

% =========================================================================
\section{Deferrals and design notes}
\label{sec:deferrals}

\paragraph{Mass-aware drift.} The current model assumes the payload moves
horizontally at exactly the local wind velocity. A heavier payload
exhibits a velocity defect proportional to inertial coupling. A
plausible extension is the Stokes-style first-order lag model
\[
  \dot{\mathbf{v}}_p = \frac{1}{\tau}\bigl(\mathbf{v}_{\text{wind}}(t,\mathbf{s}) - \mathbf{v}_p\bigr),
\]
introduced as an additional state variable $\mathbf{v}_p$ alongside the
existing $\mathbf{s}$. The Propagator interface already accepts
arbitrary State types via generics in numerics; the engine could lift
its State to $(\mathbf{s}, \mathbf{v}_p)$ for a future mass-aware
propagator without breaking the existing models.

\paragraph{Coordinate system upgrades.} Migrating to WGS84/ECEF would
remove the cosine factor in the horizontal wind transport equation and
make distances metric directly. GFS itself uses a spherical Earth; the
wind components are not directly portable. A clean implementation
provides a coordinate-system parameter on the profile request; for now,
the spherical model is used uniformly so that outputs remain bit
identical to the upstream Tawhiri.

\paragraph{Monte Carlo.} GEFS already provides 21 ensemble members per
epoch. A Monte Carlo prediction would sample $K$ trajectories per
request, each picking a (member, parameter perturbation) pair. The
recommended architecture is to keep the perturbation inside the
predictor (so the same wind sample can serve many members and any
piecewise rate noise is correlated with the wind step), exposed as a
\verb|POST /api/v1/montecarlo| endpoint that returns one job per
sample and aggregates outcomes.

% =========================================================================
\section{Implementation notes}
\label{sec:impl}

The numerics library is intentionally small (under 300 lines of Go) and
uses no allocations on the hot path. The generic \verb|RK4Step| and
\verb|RefineTrigger| compile to per-type specialisations under Go's
generics, so a future C or Rust port can mirror the implementation
verbatim without changing the call sites in the trajectory engine.

\end{document}