This commit is contained in:
Anatoly Antonov 2026-05-18 03:17:17 +09:00
parent 7a8d5d13fa
commit 9e663db9dc
68 changed files with 5647 additions and 2958 deletions

146
internal/metrics/prom.go Normal file
View file

@ -0,0 +1,146 @@
package metrics
import (
"fmt"
"io"
"net/http"
"sort"
"strings"
"sync"
"time"
)
// Prom is a minimal Sink that exposes counters and gauges in Prometheus's
// text exposition format. No external dependencies.
//
// The Prom sink supports labelled counters, sums (for durations and byte
// counts), and labelled gauges. Histograms are intentionally omitted; if
// they are needed later, swap Prom for an OTel-based sink.
type Prom struct {
mu sync.Mutex
counters map[string]map[string]float64 // name → label-key → value
gauges map[string]map[string]float64 // name → label-key → value
}
// NewProm returns an empty Prom sink.
func NewProm() *Prom {
return &Prom{
counters: make(map[string]map[string]float64),
gauges: make(map[string]map[string]float64),
}
}
// Prediction implements Sink.
func (p *Prom) Prediction(profile string, d time.Duration, err error) {
status := "ok"
if err != nil {
status = "error"
}
labels := map[string]string{"profile": profile, "status": status}
p.incCounter("predictor_predictions_total", labels, 1)
p.incCounter("predictor_prediction_duration_seconds_sum", labels, d.Seconds())
p.incCounter("predictor_prediction_duration_seconds_count", labels, 1)
}
// Download implements Sink.
func (p *Prom) Download(source string, d time.Duration, status string, bytes int64) {
labels := map[string]string{"source": source, "status": status}
p.incCounter("predictor_downloads_total", labels, 1)
p.incCounter("predictor_download_duration_seconds_sum", labels, d.Seconds())
p.incCounter("predictor_download_bytes_total", map[string]string{"source": source}, float64(bytes))
}
// ActiveEpoch implements Sink.
func (p *Prom) ActiveEpoch(t time.Time) {
var v float64
if !t.IsZero() {
v = float64(t.Unix())
}
p.setGauge("predictor_active_dataset_epoch_seconds", map[string]string{}, v)
}
// ServeHTTP writes the metrics in Prometheus text exposition format.
func (p *Prom) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "text/plain; version=0.0.4")
p.Write(w)
}
// Write writes the metrics in Prometheus exposition format to w.
func (p *Prom) Write(w io.Writer) {
p.mu.Lock()
defer p.mu.Unlock()
names := make([]string, 0, len(p.counters)+len(p.gauges))
for n := range p.counters {
names = append(names, n)
}
for n := range p.gauges {
names = append(names, n)
}
sort.Strings(names)
for _, name := range names {
if labels, ok := p.counters[name]; ok {
fmt.Fprintf(w, "# TYPE %s counter\n", name)
writeMetricFamily(w, name, labels)
}
if labels, ok := p.gauges[name]; ok {
fmt.Fprintf(w, "# TYPE %s gauge\n", name)
writeMetricFamily(w, name, labels)
}
}
}
func writeMetricFamily(w io.Writer, name string, labels map[string]float64) {
keys := make([]string, 0, len(labels))
for k := range labels {
keys = append(keys, k)
}
sort.Strings(keys)
for _, key := range keys {
fmt.Fprintf(w, "%s%s %g\n", name, key, labels[key])
}
}
func (p *Prom) incCounter(name string, labels map[string]string, n float64) {
key := labelKey(labels)
p.mu.Lock()
defer p.mu.Unlock()
if p.counters[name] == nil {
p.counters[name] = make(map[string]float64)
}
p.counters[name][key] += n
}
func (p *Prom) setGauge(name string, labels map[string]string, v float64) {
key := labelKey(labels)
p.mu.Lock()
defer p.mu.Unlock()
if p.gauges[name] == nil {
p.gauges[name] = make(map[string]float64)
}
p.gauges[name][key] = v
}
// labelKey renders the labels into a Prometheus-format "{k1="v1",k2="v2"}"
// suffix, empty if no labels.
func labelKey(labels map[string]string) string {
if len(labels) == 0 {
return ""
}
keys := make([]string, 0, len(labels))
for k := range labels {
keys = append(keys, k)
}
sort.Strings(keys)
var sb strings.Builder
sb.WriteByte('{')
for i, k := range keys {
if i > 0 {
sb.WriteByte(',')
}
fmt.Fprintf(&sb, "%s=%q", k, labels[k])
}
sb.WriteByte('}')
return sb.String()
}

View file

@ -0,0 +1,49 @@
package metrics
import (
"bytes"
"strings"
"testing"
"time"
)
func TestPromCounters(t *testing.T) {
p := NewProm()
p.Prediction("standard_profile", 100*time.Millisecond, nil)
p.Prediction("standard_profile", 200*time.Millisecond, nil)
p.Prediction("float_profile", 50*time.Millisecond, nil)
var buf bytes.Buffer
p.Write(&buf)
out := buf.String()
if !strings.Contains(out, `predictor_predictions_total{profile="standard_profile",status="ok"} 2`) {
t.Errorf("expected count=2 for standard_profile, got: %s", out)
}
if !strings.Contains(out, `predictor_predictions_total{profile="float_profile",status="ok"} 1`) {
t.Errorf("expected count=1 for float_profile, got: %s", out)
}
// Sum of durations: 0.1 + 0.2 = 0.3 seconds.
if !strings.Contains(out, "predictor_prediction_duration_seconds_sum") {
t.Errorf("expected sum present, got: %s", out)
}
}
func TestPromGauge(t *testing.T) {
p := NewProm()
p.ActiveEpoch(time.Unix(1700000000, 0))
var buf bytes.Buffer
p.Write(&buf)
out := buf.String()
if !strings.Contains(out, "predictor_active_dataset_epoch_seconds 1.7e+09") {
t.Errorf("expected gauge with epoch 1700000000, got: %s", out)
}
}
func TestNoop(t *testing.T) {
sink := Noop()
sink.Prediction("any", time.Second, nil)
sink.Download("any", time.Second, "complete", 0)
sink.ActiveEpoch(time.Now())
}

36
internal/metrics/types.go Normal file
View file

@ -0,0 +1,36 @@
// Package metrics defines the Sink interface used to record service metrics
// and ships two implementations: a Noop sink (default, zero-cost) and a Prom
// sink that exposes counters in the Prometheus text exposition format.
//
// The metrics layer is optional: if no Sink is wired (or Noop is wired), the
// service runs unchanged.
package metrics
import "time"
// Sink collects observations from the rest of the service.
//
// Implementations must be safe for concurrent use across many goroutines.
// All methods are advisory; implementations may ignore any observation.
type Sink interface {
// Prediction records the duration and outcome of one prediction.
// err is nil on success; otherwise the error's class is used as a label.
Prediction(profile string, duration time.Duration, err error)
// Download records the outcome of one dataset download job.
// status is "complete", "failed", or "cancelled".
Download(source string, duration time.Duration, status string, bytes int64)
// ActiveEpoch reports the forecast time of the currently-loaded dataset.
// Pass time.Time{} when no dataset is loaded.
ActiveEpoch(t time.Time)
}
// Noop returns a Sink that discards every observation.
func Noop() Sink { return noop{} }
type noop struct{}
func (noop) Prediction(string, time.Duration, error) {}
func (noop) Download(string, time.Duration, string, int64) {}
func (noop) ActiveEpoch(time.Time) {}