cmd/strelaypoolsrv: Prevent scraped metrics moving backwards (#5068)
This commit is contained in:
parent
5161f03f02
commit
93fdd1c012
|
@ -496,7 +496,7 @@ func handleRelayTest(request request) {
|
|||
|
||||
mut.Lock()
|
||||
if stats != nil {
|
||||
updateMetrics(request.relay.uri.Host, stats, location)
|
||||
updateMetrics(request.relay.uri.Host, *stats, location)
|
||||
}
|
||||
request.relay.Stats = stats
|
||||
request.relay.StatsRetrieved = time.Now()
|
||||
|
|
|
@ -44,6 +44,8 @@ var (
|
|||
relayGlobalRate = makeGauge("relay_global_rate", "Global rate applied on the whole relay", "relay")
|
||||
relayBuildInfo = makeGauge("relay_build_info", "Build information about a relay", "relay", "go_version", "go_os", "go_arch")
|
||||
relayLocationInfo = makeGauge("relay_location_info", "Location information about a relay", "relay", "city", "country", "continent")
|
||||
|
||||
lastStats = make(map[string]stats)
|
||||
)
|
||||
|
||||
func makeGauge(name string, help string, labels ...string) *prometheus.GaugeVec {
|
||||
|
@ -142,7 +144,7 @@ func refreshStats() {
|
|||
if result.stats == nil {
|
||||
deleteMetrics(result.relay.uri.Host)
|
||||
} else {
|
||||
updateMetrics(result.relay.uri.Host, result.stats, result.relay.Location)
|
||||
updateMetrics(result.relay.uri.Host, *result.stats, result.relay.Location)
|
||||
}
|
||||
}
|
||||
mut.Unlock()
|
||||
|
@ -182,13 +184,18 @@ func fetchStats(relay *relay) *stats {
|
|||
return &stats
|
||||
}
|
||||
|
||||
func updateMetrics(host string, stats *stats, location location) {
|
||||
func updateMetrics(host string, stats stats, location location) {
|
||||
if stats.GoVersion != "" || stats.GoOS != "" || stats.GoArch != "" {
|
||||
relayBuildInfo.WithLabelValues(host, stats.GoVersion, stats.GoOS, stats.GoArch).Add(1)
|
||||
}
|
||||
if location.City != "" || location.Country != "" || location.Continent != "" {
|
||||
relayLocationInfo.WithLabelValues(host, location.City, location.Country, location.Continent).Add(1)
|
||||
}
|
||||
|
||||
if lastStat, ok := lastStats[host]; ok {
|
||||
stats = mergeStats(stats, lastStat)
|
||||
}
|
||||
|
||||
relayUptime.WithLabelValues(host).Set(float64(stats.UptimeSeconds))
|
||||
relayPendingSessionKeys.WithLabelValues(host).Set(float64(stats.PendingSessionKeys))
|
||||
relayActiveSessions.WithLabelValues(host).Set(float64(stats.ActiveSessions))
|
||||
|
@ -198,6 +205,7 @@ func updateMetrics(host string, stats *stats, location location) {
|
|||
relayGoRoutines.WithLabelValues(host).Set(float64(stats.GoRoutines))
|
||||
relaySessionRate.WithLabelValues(host).Set(float64(stats.Options.SessionRate))
|
||||
relayGlobalRate.WithLabelValues(host).Set(float64(stats.Options.GlobalRate))
|
||||
lastStats[host] = stats
|
||||
}
|
||||
|
||||
func deleteMetrics(host string) {
|
||||
|
@ -210,4 +218,33 @@ func deleteMetrics(host string) {
|
|||
relayGoRoutines.DeleteLabelValues(host)
|
||||
relaySessionRate.DeleteLabelValues(host)
|
||||
relayGlobalRate.DeleteLabelValues(host)
|
||||
delete(lastStats, host)
|
||||
}
|
||||
|
||||
// Due to some unexplainable behaviour, some of the numbers sometimes travel slightly backwards (by less than 1%)
|
||||
// This happens between scrapes, which is 30s, so this can't be a race.
|
||||
// This causes prometheus to assume a "rate reset", hence causes phenomenal spikes.
|
||||
// One of the number that moves backwards is BytesProxied, which atomically increments a counter with numeric value
|
||||
// returned by net.Conn.Read(). I don't think that can return a negative value, so I have no idea what's going on.
|
||||
func mergeStats(new stats, old stats) stats {
|
||||
new.UptimeSeconds = mergeValue(new.UptimeSeconds, old.UptimeSeconds)
|
||||
new.PendingSessionKeys = mergeValue(new.PendingSessionKeys, old.PendingSessionKeys)
|
||||
new.ActiveSessions = mergeValue(new.ActiveSessions, old.ActiveSessions)
|
||||
new.Connections = mergeValue(new.Connections, old.Connections)
|
||||
new.Proxies = mergeValue(new.Proxies, old.Proxies)
|
||||
new.BytesProxied = mergeValue(new.BytesProxied, old.BytesProxied)
|
||||
new.GoRoutines = mergeValue(new.GoRoutines, old.GoRoutines)
|
||||
new.Options.SessionRate = mergeValue(new.Options.SessionRate, old.Options.SessionRate)
|
||||
new.Options.GlobalRate = mergeValue(new.Options.GlobalRate, old.Options.GlobalRate)
|
||||
return new
|
||||
}
|
||||
|
||||
func mergeValue(new, old int) int {
|
||||
if new >= old {
|
||||
return new // normal increase
|
||||
}
|
||||
if float64(new) > 0.99*float64(old) {
|
||||
return old // slight backward movement
|
||||
}
|
||||
return new // reset (relay restart)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
// Copyright (C) 2015 Audrius Butkevicius and Contributors (see the CONTRIBUTORS file).
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMerge(t *testing.T) {
|
||||
if mergeValue(1001, 1000) != 1001 {
|
||||
t.Error("the computer says no")
|
||||
}
|
||||
|
||||
if mergeValue(999, 1000) != 1000 {
|
||||
t.Error("the computer says no")
|
||||
}
|
||||
|
||||
if mergeValue(1, 1000) != 1 {
|
||||
t.Error("the computer says no")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue