cmd/strelaypoolsrv: Prevent scraped metrics moving backwards (#5068)
This commit is contained in:
parent
5161f03f02
commit
93fdd1c012
|
@ -496,7 +496,7 @@ func handleRelayTest(request request) {
|
||||||
|
|
||||||
mut.Lock()
|
mut.Lock()
|
||||||
if stats != nil {
|
if stats != nil {
|
||||||
updateMetrics(request.relay.uri.Host, stats, location)
|
updateMetrics(request.relay.uri.Host, *stats, location)
|
||||||
}
|
}
|
||||||
request.relay.Stats = stats
|
request.relay.Stats = stats
|
||||||
request.relay.StatsRetrieved = time.Now()
|
request.relay.StatsRetrieved = time.Now()
|
||||||
|
|
|
@ -44,6 +44,8 @@ var (
|
||||||
relayGlobalRate = makeGauge("relay_global_rate", "Global rate applied on the whole relay", "relay")
|
relayGlobalRate = makeGauge("relay_global_rate", "Global rate applied on the whole relay", "relay")
|
||||||
relayBuildInfo = makeGauge("relay_build_info", "Build information about a relay", "relay", "go_version", "go_os", "go_arch")
|
relayBuildInfo = makeGauge("relay_build_info", "Build information about a relay", "relay", "go_version", "go_os", "go_arch")
|
||||||
relayLocationInfo = makeGauge("relay_location_info", "Location information about a relay", "relay", "city", "country", "continent")
|
relayLocationInfo = makeGauge("relay_location_info", "Location information about a relay", "relay", "city", "country", "continent")
|
||||||
|
|
||||||
|
lastStats = make(map[string]stats)
|
||||||
)
|
)
|
||||||
|
|
||||||
func makeGauge(name string, help string, labels ...string) *prometheus.GaugeVec {
|
func makeGauge(name string, help string, labels ...string) *prometheus.GaugeVec {
|
||||||
|
@ -142,7 +144,7 @@ func refreshStats() {
|
||||||
if result.stats == nil {
|
if result.stats == nil {
|
||||||
deleteMetrics(result.relay.uri.Host)
|
deleteMetrics(result.relay.uri.Host)
|
||||||
} else {
|
} else {
|
||||||
updateMetrics(result.relay.uri.Host, result.stats, result.relay.Location)
|
updateMetrics(result.relay.uri.Host, *result.stats, result.relay.Location)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mut.Unlock()
|
mut.Unlock()
|
||||||
|
@ -182,13 +184,18 @@ func fetchStats(relay *relay) *stats {
|
||||||
return &stats
|
return &stats
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateMetrics(host string, stats *stats, location location) {
|
func updateMetrics(host string, stats stats, location location) {
|
||||||
if stats.GoVersion != "" || stats.GoOS != "" || stats.GoArch != "" {
|
if stats.GoVersion != "" || stats.GoOS != "" || stats.GoArch != "" {
|
||||||
relayBuildInfo.WithLabelValues(host, stats.GoVersion, stats.GoOS, stats.GoArch).Add(1)
|
relayBuildInfo.WithLabelValues(host, stats.GoVersion, stats.GoOS, stats.GoArch).Add(1)
|
||||||
}
|
}
|
||||||
if location.City != "" || location.Country != "" || location.Continent != "" {
|
if location.City != "" || location.Country != "" || location.Continent != "" {
|
||||||
relayLocationInfo.WithLabelValues(host, location.City, location.Country, location.Continent).Add(1)
|
relayLocationInfo.WithLabelValues(host, location.City, location.Country, location.Continent).Add(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if lastStat, ok := lastStats[host]; ok {
|
||||||
|
stats = mergeStats(stats, lastStat)
|
||||||
|
}
|
||||||
|
|
||||||
relayUptime.WithLabelValues(host).Set(float64(stats.UptimeSeconds))
|
relayUptime.WithLabelValues(host).Set(float64(stats.UptimeSeconds))
|
||||||
relayPendingSessionKeys.WithLabelValues(host).Set(float64(stats.PendingSessionKeys))
|
relayPendingSessionKeys.WithLabelValues(host).Set(float64(stats.PendingSessionKeys))
|
||||||
relayActiveSessions.WithLabelValues(host).Set(float64(stats.ActiveSessions))
|
relayActiveSessions.WithLabelValues(host).Set(float64(stats.ActiveSessions))
|
||||||
|
@ -198,6 +205,7 @@ func updateMetrics(host string, stats *stats, location location) {
|
||||||
relayGoRoutines.WithLabelValues(host).Set(float64(stats.GoRoutines))
|
relayGoRoutines.WithLabelValues(host).Set(float64(stats.GoRoutines))
|
||||||
relaySessionRate.WithLabelValues(host).Set(float64(stats.Options.SessionRate))
|
relaySessionRate.WithLabelValues(host).Set(float64(stats.Options.SessionRate))
|
||||||
relayGlobalRate.WithLabelValues(host).Set(float64(stats.Options.GlobalRate))
|
relayGlobalRate.WithLabelValues(host).Set(float64(stats.Options.GlobalRate))
|
||||||
|
lastStats[host] = stats
|
||||||
}
|
}
|
||||||
|
|
||||||
func deleteMetrics(host string) {
|
func deleteMetrics(host string) {
|
||||||
|
@ -210,4 +218,33 @@ func deleteMetrics(host string) {
|
||||||
relayGoRoutines.DeleteLabelValues(host)
|
relayGoRoutines.DeleteLabelValues(host)
|
||||||
relaySessionRate.DeleteLabelValues(host)
|
relaySessionRate.DeleteLabelValues(host)
|
||||||
relayGlobalRate.DeleteLabelValues(host)
|
relayGlobalRate.DeleteLabelValues(host)
|
||||||
|
delete(lastStats, host)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Due to some unexplainable behaviour, some of the numbers sometimes travel slightly backwards (by less than 1%)
|
||||||
|
// This happens between scrapes, which is 30s, so this can't be a race.
|
||||||
|
// This causes prometheus to assume a "rate reset", hence causes phenomenal spikes.
|
||||||
|
// One of the number that moves backwards is BytesProxied, which atomically increments a counter with numeric value
|
||||||
|
// returned by net.Conn.Read(). I don't think that can return a negative value, so I have no idea what's going on.
|
||||||
|
func mergeStats(new stats, old stats) stats {
|
||||||
|
new.UptimeSeconds = mergeValue(new.UptimeSeconds, old.UptimeSeconds)
|
||||||
|
new.PendingSessionKeys = mergeValue(new.PendingSessionKeys, old.PendingSessionKeys)
|
||||||
|
new.ActiveSessions = mergeValue(new.ActiveSessions, old.ActiveSessions)
|
||||||
|
new.Connections = mergeValue(new.Connections, old.Connections)
|
||||||
|
new.Proxies = mergeValue(new.Proxies, old.Proxies)
|
||||||
|
new.BytesProxied = mergeValue(new.BytesProxied, old.BytesProxied)
|
||||||
|
new.GoRoutines = mergeValue(new.GoRoutines, old.GoRoutines)
|
||||||
|
new.Options.SessionRate = mergeValue(new.Options.SessionRate, old.Options.SessionRate)
|
||||||
|
new.Options.GlobalRate = mergeValue(new.Options.GlobalRate, old.Options.GlobalRate)
|
||||||
|
return new
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeValue(new, old int) int {
|
||||||
|
if new >= old {
|
||||||
|
return new // normal increase
|
||||||
|
}
|
||||||
|
if float64(new) > 0.99*float64(old) {
|
||||||
|
return old // slight backward movement
|
||||||
|
}
|
||||||
|
return new // reset (relay restart)
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
// Copyright (C) 2015 Audrius Butkevicius and Contributors (see the CONTRIBUTORS file).
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMerge(t *testing.T) {
|
||||||
|
if mergeValue(1001, 1000) != 1001 {
|
||||||
|
t.Error("the computer says no")
|
||||||
|
}
|
||||||
|
|
||||||
|
if mergeValue(999, 1000) != 1000 {
|
||||||
|
t.Error("the computer says no")
|
||||||
|
}
|
||||||
|
|
||||||
|
if mergeValue(1, 1000) != 1 {
|
||||||
|
t.Error("the computer says no")
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue