// Copyright (c) 2020 TypeFox GmbH. All rights reserved. // Licensed under the GNU Affero General Public License (AGPL). // See License-AGPL.txt in the project root for license information. package cmd import ( "net" "net/http" "os" "os/signal" "syscall" "time" grpc_gitpod "github.com/gitpod-io/gitpod/common-go/grpc" "github.com/gitpod-io/gitpod/common-go/log" "github.com/gitpod-io/gitpod/common-go/pprof" "github.com/gitpod-io/gitpod/content-service/pkg/layer" "github.com/gitpod-io/gitpod/ws-manager/pkg/manager" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing" "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/keepalive" ) // serveCmd represents the serve command var runCmd = &cobra.Command{ Use: "run", Short: "Starts the workspace monitor", Run: func(cmd *cobra.Command, args []string) { cfg := getConfig() err := cfg.Manager.Validate() if err != nil { log.WithError(err).Fatal("invalid configuration") } log.Info("wsman configuration is valid") clientset, err := newClientSet() if err != nil { log.WithError(err).Fatal("cannot connect to Kubernetes") } log.Info("connected to Kubernetes") cp, err := layer.NewProvider(&cfg.Content.Storage) if err != nil { log.WithError(err).Fatal("invalid content provider configuration") } mgmt, err := manager.New(cfg.Manager, clientset, cp) if err != nil { log.WithError(err).Fatal("cannot create manager") } defer mgmt.Close() if len(cfg.RPCServer.RateLimits) > 0 { log.WithField("ratelimits", cfg.RPCServer.RateLimits).Info("imposing rate limits on the gRPC interface") } ratelimits := grpc_gitpod.NewRatelimitingInterceptor(cfg.RPCServer.RateLimits) reg := prometheus.NewRegistry() callMetrics, err := grpc_gitpod.NewUnaryCallMetricsInterceptor(prometheus.WrapRegistererWithPrefix("gitpod_ws_manager_", reg)) if err != nil { log.WithError(err).Fatal("cannot register gRPC call metrics") } grpcOpts := []grpc.ServerOption{ // We don't know how good our cients are at closing connections. If they don't close them properly // we'll be leaking goroutines left and right. Closing Idle connections should prevent that. grpc.KeepaliveParams(keepalive.ServerParameters{MaxConnectionIdle: 30 * time.Minute}), grpc.StreamInterceptor(grpc_middleware.ChainStreamServer( grpc_opentracing.StreamServerInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer())), )), grpc.UnaryInterceptor(grpc_middleware.ChainUnaryServer( // add call metrics first to capture ratelimit errors callMetrics, ratelimits.UnaryInterceptor(), grpc_opentracing.UnaryServerInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer())), )), } if cfg.RPCServer.TLS.Certificate != "" && cfg.RPCServer.TLS.PrivateKey != "" { creds, err := credentials.NewServerTLSFromFile(cfg.RPCServer.TLS.Certificate, cfg.RPCServer.TLS.PrivateKey) if err != nil { log.WithError(err).WithField("crt", cfg.RPCServer.TLS.Certificate).WithField("key", cfg.RPCServer.TLS.PrivateKey).Fatal("could not load TLS keys") } grpcOpts = append(grpcOpts, grpc.Creds(creds)) log.WithField("crt", cfg.RPCServer.TLS.Certificate).WithField("key", cfg.RPCServer.TLS.PrivateKey).Debug("securing gRPC server with TLS") } else { log.Warn("no TLS configured - gRPC server will be unsecured") } grpcServer := grpc.NewServer(grpcOpts...) defer grpcServer.Stop() manager.Register(grpcServer, mgmt) lis, err := net.Listen("tcp", cfg.RPCServer.Addr) if err != nil { log.WithError(err).WithField("addr", cfg.RPCServer.Addr).Fatal("cannot start RPC server") } //nolint:errcheck go grpcServer.Serve(lis) log.WithField("addr", cfg.RPCServer.Addr).Info("started gRPC server") monitor, err := mgmt.CreateMonitor() if err != nil { log.WithError(err).Fatal("cannot start workspace monitor") } err = monitor.Start() if err != nil { log.WithError(err).Fatal("cannot start workspace monitor") } defer monitor.Stop() log.Info("workspace monitor is up and running") if cfg.PProf.Addr != "" { go pprof.Serve(cfg.PProf.Addr) } if cfg.Prometheus.Addr != "" { reg.MustRegister( prometheus.NewGoCollector(), prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}), ) err := mgmt.RegisterMetrics(reg) if err != nil { log.WithError(err).Error("Prometheus metrics incomplete") } handler := http.NewServeMux() handler.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) go func() { err := http.ListenAndServe(cfg.Prometheus.Addr, handler) if err != nil { log.WithError(err).Error("Prometheus metrics server failed") } }() log.WithField("addr", cfg.Prometheus.Addr).Info("started Prometheus metrics server") } // run until we're told to stop sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) log.Info("🦸 wsman is up and running. Stop with SIGINT or CTRL+C") <-sigChan log.Info("Received SIGINT - shutting down") }, } func init() { rootCmd.AddCommand(runCmd) }