mirror of
https://github.com/gitpod-io/gitpod.git
synced 2025-12-08 17:36:30 +00:00
* [ws-manager-mk2] Maintenance mode reconciler * [ws-manager-mk2] Check for maintenance mode * [ws-manager-mk2] Default to maintenance mode on startup * [ws-manager-mk2] Disable maintenance on unmarshal failure
286 lines
9.8 KiB
Go
286 lines
9.8 KiB
Go
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
|
|
// Licensed under the GNU Affero General Public License (AGPL).
|
|
// See License-AGPL.txt in the project root for license information.
|
|
|
|
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
|
|
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
|
|
// to ensure that exec-entrypoint and run can make use of them.
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
_ "k8s.io/client-go/plugin/pkg/client/auth"
|
|
"k8s.io/client-go/rest"
|
|
|
|
grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
|
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
|
|
ctrl "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/cache"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/healthz"
|
|
"sigs.k8s.io/controller-runtime/pkg/log/zap"
|
|
"sigs.k8s.io/controller-runtime/pkg/metrics"
|
|
|
|
common_grpc "github.com/gitpod-io/gitpod/common-go/grpc"
|
|
"github.com/gitpod-io/gitpod/common-go/log"
|
|
"github.com/gitpod-io/gitpod/common-go/pprof"
|
|
"github.com/gitpod-io/gitpod/common-go/tracing"
|
|
imgbldr "github.com/gitpod-io/gitpod/image-builder/api"
|
|
regapi "github.com/gitpod-io/gitpod/registry-facade/api"
|
|
wsmanapi "github.com/gitpod-io/gitpod/ws-manager/api"
|
|
config "github.com/gitpod-io/gitpod/ws-manager/api/config"
|
|
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
|
|
|
|
"github.com/gitpod-io/gitpod/ws-manager-mk2/controllers"
|
|
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/activity"
|
|
"github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/maintenance"
|
|
imgproxy "github.com/gitpod-io/gitpod/ws-manager-mk2/pkg/proxy"
|
|
"github.com/gitpod-io/gitpod/ws-manager-mk2/service"
|
|
//+kubebuilder:scaffold:imports
|
|
)
|
|
|
|
var (
|
|
scheme = runtime.NewScheme()
|
|
setupLog = ctrl.Log.WithName("setup")
|
|
)
|
|
|
|
func init() {
|
|
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
|
|
|
|
utilruntime.Must(workspacev1.AddToScheme(scheme))
|
|
//+kubebuilder:scaffold:scheme
|
|
}
|
|
|
|
func main() {
|
|
var enableLeaderElection bool
|
|
var configFN string
|
|
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
|
|
"Enable leader election for controller manager. "+
|
|
"Enabling this will ensure there is only one active controller manager.")
|
|
flag.StringVar(&configFN, "config", "", "Path to the config file")
|
|
opts := zap.Options{
|
|
Development: true,
|
|
}
|
|
opts.BindFlags(flag.CommandLine)
|
|
flag.Parse()
|
|
|
|
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
|
|
promrep := &tracing.PromReporter{
|
|
Operations: map[string]tracing.SpanMetricMapping{
|
|
"StartWorkspace": {
|
|
Name: "wsman_start_workspace",
|
|
Help: "time it takes to service a StartWorkspace request",
|
|
Buckets: prometheus.LinearBuckets(0, 500, 10), // 10 buckets, each 500ms wide
|
|
},
|
|
},
|
|
}
|
|
closer := tracing.Init("ws-manager-mk2", tracing.WithPrometheusReporter(promrep))
|
|
if closer != nil {
|
|
defer closer.Close()
|
|
}
|
|
|
|
cfg, err := getConfig(configFN)
|
|
if err != nil {
|
|
setupLog.Error(err, "unable to read config")
|
|
os.Exit(1)
|
|
}
|
|
|
|
if cfg.PProf.Addr != "" {
|
|
go pprof.Serve(cfg.PProf.Addr)
|
|
}
|
|
|
|
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
|
|
Scheme: scheme,
|
|
MetricsBindAddress: cfg.Prometheus.Addr,
|
|
Port: 9443,
|
|
HealthProbeBindAddress: cfg.Health.Addr,
|
|
LeaderElection: enableLeaderElection,
|
|
LeaderElectionID: "ws-manager-mk2-leader.gitpod.io",
|
|
Namespace: cfg.Manager.Namespace,
|
|
NewCache: func(conf *rest.Config, opts cache.Options) (cache.Cache, error) {
|
|
// Only watch the maintenance mode ConfigMap.
|
|
opts.SelectorsByObject = cache.SelectorsByObject{
|
|
&corev1.ConfigMap{}: cache.ObjectSelector{
|
|
Label: labels.SelectorFromSet(labels.Set{controllers.LabelMaintenance: "true"}),
|
|
},
|
|
}
|
|
return cache.New(conf, opts)
|
|
},
|
|
})
|
|
if err != nil {
|
|
setupLog.Error(err, "unable to start manager")
|
|
os.Exit(1)
|
|
}
|
|
|
|
maintenance, err := controllers.NewMaintenanceReconciler(mgr.GetClient())
|
|
if err != nil {
|
|
setupLog.Error(err, "unable to create maintenance controller", "controller", "Maintenance")
|
|
os.Exit(1)
|
|
}
|
|
|
|
reconciler, err := controllers.NewWorkspaceReconciler(mgr.GetClient(), mgr.GetScheme(), &cfg.Manager, metrics.Registry, maintenance)
|
|
if err != nil {
|
|
setupLog.Error(err, "unable to create controller", "controller", "Workspace")
|
|
os.Exit(1)
|
|
}
|
|
|
|
activity := &activity.WorkspaceActivity{}
|
|
timeoutReconciler, err := controllers.NewTimeoutReconciler(mgr.GetClient(), cfg.Manager, activity)
|
|
if err != nil {
|
|
setupLog.Error(err, "unable to create timeout controller", "controller", "Timeout")
|
|
os.Exit(1)
|
|
}
|
|
|
|
wsmanService, err := setupGRPCService(cfg, mgr.GetClient(), activity, maintenance)
|
|
if err != nil {
|
|
setupLog.Error(err, "unable to start manager service")
|
|
os.Exit(1)
|
|
}
|
|
|
|
reconciler.OnReconcile = wsmanService.OnWorkspaceReconcile
|
|
if err = reconciler.SetupWithManager(mgr); err != nil {
|
|
setupLog.Error(err, "unable to setup workspace controller with manager", "controller", "Workspace")
|
|
os.Exit(1)
|
|
}
|
|
if err = timeoutReconciler.SetupWithManager(mgr); err != nil {
|
|
setupLog.Error(err, "unable to setup timeout controller with manager", "controller", "Timeout")
|
|
os.Exit(1)
|
|
}
|
|
if err = maintenance.SetupWithManager(mgr); err != nil {
|
|
setupLog.Error(err, "unable to setup maintenance controller with manager", "controller", "Maintenance")
|
|
os.Exit(1)
|
|
}
|
|
|
|
// if err = (&workspacev1.Workspace{}).SetupWebhookWithManager(mgr); err != nil {
|
|
// setupLog.Error(err, "unable to create webhook", "webhook", "Workspace")
|
|
// os.Exit(1)
|
|
// }
|
|
|
|
//+kubebuilder:scaffold:builder
|
|
|
|
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
|
|
setupLog.Error(err, "unable to set up health check")
|
|
os.Exit(1)
|
|
}
|
|
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
|
|
setupLog.Error(err, "unable to set up ready check")
|
|
os.Exit(1)
|
|
}
|
|
|
|
setupLog.Info("starting manager")
|
|
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
|
|
setupLog.Error(err, "problem running manager")
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func setupGRPCService(cfg *config.ServiceConfiguration, k8s client.Client, activity *activity.WorkspaceActivity, maintenance maintenance.Maintenance) (*service.WorkspaceManagerServer, error) {
|
|
// TODO(cw): remove use of common-go/log
|
|
|
|
if len(cfg.RPCServer.RateLimits) > 0 {
|
|
log.WithField("ratelimits", cfg.RPCServer.RateLimits).Info("imposing rate limits on the gRPC interface")
|
|
}
|
|
ratelimits := common_grpc.NewRatelimitingInterceptor(cfg.RPCServer.RateLimits)
|
|
|
|
grpcMetrics := grpc_prometheus.NewServerMetrics()
|
|
grpcMetrics.EnableHandlingTimeHistogram()
|
|
metrics.Registry.MustRegister(grpcMetrics)
|
|
|
|
grpcOpts := common_grpc.ServerOptionsWithInterceptors(
|
|
[]grpc.StreamServerInterceptor{grpcMetrics.StreamServerInterceptor()},
|
|
[]grpc.UnaryServerInterceptor{grpcMetrics.UnaryServerInterceptor(), ratelimits.UnaryInterceptor()},
|
|
)
|
|
if cfg.RPCServer.TLS.CA != "" && cfg.RPCServer.TLS.Certificate != "" && cfg.RPCServer.TLS.PrivateKey != "" {
|
|
tlsConfig, err := common_grpc.ClientAuthTLSConfig(
|
|
cfg.RPCServer.TLS.CA, cfg.RPCServer.TLS.Certificate, cfg.RPCServer.TLS.PrivateKey,
|
|
common_grpc.WithSetClientCAs(true),
|
|
common_grpc.WithServerName("ws-manager"),
|
|
)
|
|
if err != nil {
|
|
log.WithError(err).Fatal("cannot load ws-manager certs")
|
|
}
|
|
|
|
grpcOpts = append(grpcOpts, grpc.Creds(credentials.NewTLS(tlsConfig)))
|
|
} else {
|
|
log.Warn("no TLS configured - gRPC server will be unsecured")
|
|
}
|
|
|
|
grpcServer := grpc.NewServer(grpcOpts...)
|
|
|
|
if cfg.ImageBuilderProxy.TargetAddr != "" {
|
|
creds := insecure.NewCredentials()
|
|
if cfg.ImageBuilderProxy.TLS.CA != "" && cfg.ImageBuilderProxy.TLS.Certificate != "" && cfg.ImageBuilderProxy.TLS.PrivateKey != "" {
|
|
tlsConfig, err := common_grpc.ClientAuthTLSConfig(
|
|
cfg.ImageBuilderProxy.TLS.CA, cfg.ImageBuilderProxy.TLS.Certificate, cfg.ImageBuilderProxy.TLS.PrivateKey,
|
|
common_grpc.WithSetRootCAs(true),
|
|
common_grpc.WithServerName("image-builder-mk3"),
|
|
)
|
|
if err != nil {
|
|
log.WithError(err).Fatal("cannot load image-builder-mk3 TLS certs")
|
|
}
|
|
log.Info("Loaded TLS for image builder")
|
|
creds = credentials.NewTLS(tlsConfig)
|
|
}
|
|
// Note: never use block here, because image-builder connects to ws-manager,
|
|
// and if we blocked here, ws-manager wouldn't come up, hence we couldn't connect to ws-manager.
|
|
conn, err := grpc.Dial(cfg.ImageBuilderProxy.TargetAddr, grpc.WithTransportCredentials(creds))
|
|
if err != nil {
|
|
log.WithError(err).Fatal("failed to connect to image builder")
|
|
}
|
|
imgbldr.RegisterImageBuilderServer(grpcServer, imgproxy.ImageBuilder{D: imgbldr.NewImageBuilderClient(conn)})
|
|
}
|
|
|
|
srv := service.NewWorkspaceManagerServer(k8s, &cfg.Manager, metrics.Registry, activity, maintenance)
|
|
|
|
grpc_prometheus.Register(grpcServer)
|
|
wsmanapi.RegisterWorkspaceManagerServer(grpcServer, srv)
|
|
regapi.RegisterSpecProviderServer(grpcServer, &service.WorkspaceImageSpecProvider{
|
|
Client: k8s,
|
|
Namespace: cfg.Manager.Namespace,
|
|
})
|
|
|
|
lis, err := net.Listen("tcp", cfg.RPCServer.Addr)
|
|
if err != nil {
|
|
log.WithError(err).WithField("addr", cfg.RPCServer.Addr).Fatal("cannot start RPC server")
|
|
}
|
|
go func() {
|
|
err := grpcServer.Serve(lis)
|
|
if err != nil {
|
|
log.WithError(err).Error("gRPC service failed")
|
|
}
|
|
}()
|
|
log.WithField("addr", cfg.RPCServer.Addr).Info("started gRPC server")
|
|
|
|
return srv, nil
|
|
}
|
|
|
|
func getConfig(fn string) (*config.ServiceConfiguration, error) {
|
|
ctnt, err := os.ReadFile(fn)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot read configuration. Maybe missing --config?: %w", err)
|
|
}
|
|
|
|
var cfg config.ServiceConfiguration
|
|
dec := json.NewDecoder(bytes.NewReader(ctnt))
|
|
dec.DisallowUnknownFields()
|
|
err = dec.Decode(&cfg)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot decode configuration from %s: %w", fn, err)
|
|
}
|
|
|
|
return &cfg, nil
|
|
}
|