gitpod/components/openvsx-proxy/pkg/prometheus.go

// Copyright (c) 2021 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License.AGPL.txt in the project root for license information.

package pkg

import (
	"fmt"
	"net/http"
	"strings"

	"github.com/gitpod-io/gitpod/common-go/log"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/collectors"
	"github.com/prometheus/client_golang/prometheus/promhttp"
)

type Prometheus struct {
	reg                                 *prometheus.Registry
	BackupCacheHitCounter               prometheus.Counter
	BackupCacheMissCounter              prometheus.Counter
	BackupCacheServeCounter             prometheus.Counter
	RegularCacheHitServeCounter         prometheus.Counter
	RegularCacheMissCounter             prometheus.Counter
	RequestsCounter                     *prometheus.CounterVec
	DurationOverallHistogram            prometheus.Histogram
	DurationRequestProcessingHistogram  prometheus.Histogram
	DurationUpstreamCallHistorgram      prometheus.Histogram
	DurationResponseProcessingHistogram prometheus.Histogram
}

func (p *Prometheus) Start(cfg *Config) {
	p.reg = prometheus.NewRegistry()

	if cfg.PrometheusAddr != "" {
		p.reg.MustRegister(
			collectors.NewGoCollector(),
			collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
		)

		handler := http.NewServeMux()
		handler.Handle("/metrics", promhttp.HandlerFor(p.reg, promhttp.HandlerOpts{}))

		go func() {
			err := http.ListenAndServe(cfg.PrometheusAddr, handler)
			if err != nil {
				log.WithError(err).Error("Prometheus metrics server failed")
			}
		}()
		log.WithField("addr", cfg.PrometheusAddr).Debug("started Prometheus metrics server")
	}

	p.createMetrics()
	collectors := []prometheus.Collector{
		p.BackupCacheHitCounter,
		p.BackupCacheMissCounter,
		p.BackupCacheServeCounter,
		p.RegularCacheHitServeCounter,
		p.RegularCacheMissCounter,
		p.RequestsCounter,
		p.DurationOverallHistogram,
		p.DurationRequestProcessingHistogram,
		p.DurationUpstreamCallHistorgram,
		p.DurationResponseProcessingHistogram,
	}
	for _, c := range collectors {
		err := p.reg.Register(c)
		if err != nil {
			log.WithError(err).Error("register Prometheus metric failed")
		}
	}
}

func (p *Prometheus) createMetrics() {
	namespace := "gitpod"
	subsystem := "openvsx_proxy"
	p.BackupCacheHitCounter = prometheus.NewCounter(prometheus.CounterOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "backup_cache_hit_total",
		Help:      "The total amount of requests where we had a cached response that we could use as backup when the upstream server is down.",
	})
	p.BackupCacheMissCounter = prometheus.NewCounter(prometheus.CounterOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "backup_cache_miss_total",
		Help:      "The total amount of requests where we haven't had a cached response that we could use as backup when the upstream server is down.",
	})
	p.BackupCacheServeCounter = prometheus.NewCounter(prometheus.CounterOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "backup_cache_serve_total",
		Help:      "The total amount of requests where we actually answered with a cached response because the upstream server is down.",
	})
	p.RegularCacheHitServeCounter = prometheus.NewCounter(prometheus.CounterOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "regular_cache_hit_and_serve_total",
		Help:      "The total amount or requests where we answered with a cached response for performance reasons.",
	})
	p.RegularCacheMissCounter = prometheus.NewCounter(prometheus.CounterOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "regular_cache_miss_total",
		Help:      "The total amount or requests we haven't had a young enough cached requests to use it for performance reasons.",
	})
	p.RequestsCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "requests_total",
		Help:      "The total amount of requests by response status.",
	}, []string{"status", "path"})
	p.DurationOverallHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "duration_overall_seconds",
		Help:      "The duration in seconds of the HTTP requests.",
	})
	p.DurationRequestProcessingHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "duration_request_processing_seconds",
		Help:      "The duration in seconds of the processing of the HTTP requests before we call the upstream.",
	})
	p.DurationUpstreamCallHistorgram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "duration_upstream_call_seconds",
		Help:      "The duration in seconds of the call of the upstream server.",
	})
	p.DurationResponseProcessingHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Namespace: namespace,
		Subsystem: subsystem,
		Name:      "duration_response_processing_seconds",
		Help:      "The duration in seconds of the processing of the HTTP responses after we have called the upstream.",
	})
}

var expectedPaths = map[string]struct{}{
	"/api/-/query":                   {},
	"/vscode/asset":                  {},
	"/vscode/gallery/extensionquery": {},
	"/vscode/gallery/itemName":       {},
	"/vscode/gallery/publishers":     {},
}

func (p *Prometheus) IncStatusCounter(r *http.Request, status string) {
	path := r.URL.Path
	if strings.HasPrefix(path, "/vscode/asset/") {
		// remove everything after /vscode/asset/ to decrease the unique numbers of paths
		path = path[:len("/vscode/asset/")]
	}
	if strings.HasPrefix(path, "/vscode/gallery/itemName/") {
		// remove everything after /vscode/gallery/itemName/ to decrease the unique numbers of paths
		path = path[:len("/vscode/gallery/itemName/")]
	}
	// just to make sure that a long path doesn't slip through cut after 3 segements
	// since path starts with a / the first segment is an emtpy string, therefore len > 4 and not len > 3
	if s := strings.SplitN(path, "/", 5); len(s) > 4 {
		path = strings.Join(s[:4], "/")
	}
	// don't track unexepected paths (e.g. requests from crawlers/bots)
	if _, ok := expectedPaths[strings.TrimSuffix(path, "/")]; !ok {
		log.WithField("path", path).Debug("unexpected path")
		path = "(other)"
	}
	p.RequestsCounter.WithLabelValues(status, fmt.Sprintf("%s %s", r.Method, path)).Inc()
}