2022-12-08 13:05:19 -03:00

169 lines
6.3 KiB
Go

// Copyright (c) 2021 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License.AGPL.txt in the project root for license information.
package pkg
import (
"fmt"
"net/http"
"strings"
"github.com/gitpod-io/gitpod/common-go/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
type Prometheus struct {
reg *prometheus.Registry
BackupCacheHitCounter prometheus.Counter
BackupCacheMissCounter prometheus.Counter
BackupCacheServeCounter prometheus.Counter
RegularCacheHitServeCounter prometheus.Counter
RegularCacheMissCounter prometheus.Counter
RequestsCounter *prometheus.CounterVec
DurationOverallHistogram prometheus.Histogram
DurationRequestProcessingHistogram prometheus.Histogram
DurationUpstreamCallHistorgram prometheus.Histogram
DurationResponseProcessingHistogram prometheus.Histogram
}
func (p *Prometheus) Start(cfg *Config) {
p.reg = prometheus.NewRegistry()
if cfg.PrometheusAddr != "" {
p.reg.MustRegister(
collectors.NewGoCollector(),
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
)
handler := http.NewServeMux()
handler.Handle("/metrics", promhttp.HandlerFor(p.reg, promhttp.HandlerOpts{}))
go func() {
err := http.ListenAndServe(cfg.PrometheusAddr, handler)
if err != nil {
log.WithError(err).Error("Prometheus metrics server failed")
}
}()
log.WithField("addr", cfg.PrometheusAddr).Debug("started Prometheus metrics server")
}
p.createMetrics()
collectors := []prometheus.Collector{
p.BackupCacheHitCounter,
p.BackupCacheMissCounter,
p.BackupCacheServeCounter,
p.RegularCacheHitServeCounter,
p.RegularCacheMissCounter,
p.RequestsCounter,
p.DurationOverallHistogram,
p.DurationRequestProcessingHistogram,
p.DurationUpstreamCallHistorgram,
p.DurationResponseProcessingHistogram,
}
for _, c := range collectors {
err := p.reg.Register(c)
if err != nil {
log.WithError(err).Error("register Prometheus metric failed")
}
}
}
func (p *Prometheus) createMetrics() {
namespace := "gitpod"
subsystem := "openvsx_proxy"
p.BackupCacheHitCounter = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "backup_cache_hit_total",
Help: "The total amount of requests where we had a cached response that we could use as backup when the upstream server is down.",
})
p.BackupCacheMissCounter = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "backup_cache_miss_total",
Help: "The total amount of requests where we haven't had a cached response that we could use as backup when the upstream server is down.",
})
p.BackupCacheServeCounter = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "backup_cache_serve_total",
Help: "The total amount of requests where we actually answered with a cached response because the upstream server is down.",
})
p.RegularCacheHitServeCounter = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "regular_cache_hit_and_serve_total",
Help: "The total amount or requests where we answered with a cached response for performance reasons.",
})
p.RegularCacheMissCounter = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "regular_cache_miss_total",
Help: "The total amount or requests we haven't had a young enough cached requests to use it for performance reasons.",
})
p.RequestsCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "requests_total",
Help: "The total amount of requests by response status.",
}, []string{"status", "path"})
p.DurationOverallHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "duration_overall_seconds",
Help: "The duration in seconds of the HTTP requests.",
})
p.DurationRequestProcessingHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "duration_request_processing_seconds",
Help: "The duration in seconds of the processing of the HTTP requests before we call the upstream.",
})
p.DurationUpstreamCallHistorgram = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "duration_upstream_call_seconds",
Help: "The duration in seconds of the call of the upstream server.",
})
p.DurationResponseProcessingHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "duration_response_processing_seconds",
Help: "The duration in seconds of the processing of the HTTP responses after we have called the upstream.",
})
}
var expectedPaths = map[string]struct{}{
"/api/-/query": {},
"/vscode/asset": {},
"/vscode/gallery/extensionquery": {},
"/vscode/gallery/itemName": {},
"/vscode/gallery/publishers": {},
}
func (p *Prometheus) IncStatusCounter(r *http.Request, status string) {
path := r.URL.Path
if strings.HasPrefix(path, "/vscode/asset/") {
// remove everything after /vscode/asset/ to decrease the unique numbers of paths
path = path[:len("/vscode/asset/")]
}
if strings.HasPrefix(path, "/vscode/gallery/itemName/") {
// remove everything after /vscode/gallery/itemName/ to decrease the unique numbers of paths
path = path[:len("/vscode/gallery/itemName/")]
}
// just to make sure that a long path doesn't slip through cut after 3 segements
// since path starts with a / the first segment is an emtpy string, therefore len > 4 and not len > 3
if s := strings.SplitN(path, "/", 5); len(s) > 4 {
path = strings.Join(s[:4], "/")
}
// don't track unexepected paths (e.g. requests from crawlers/bots)
if _, ok := expectedPaths[strings.TrimSuffix(path, "/")]; !ok {
log.WithField("path", path).Debug("unexpected path")
path = "(other)"
}
p.RequestsCounter.WithLabelValues(status, fmt.Sprintf("%s %s", r.Method, path)).Inc()
}