mirror of
https://github.com/gitpod-io/gitpod.git
synced 2025-12-08 17:36:30 +00:00
340 lines
11 KiB
Go
340 lines
11 KiB
Go
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
|
|
// Licensed under the GNU Affero General Public License (AGPL).
|
|
// See License-AGPL.txt in the project root for license information.
|
|
|
|
package controllers
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
ctrl "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
|
"sigs.k8s.io/controller-runtime/pkg/log"
|
|
|
|
wsk8s "github.com/gitpod-io/gitpod/common-go/kubernetes"
|
|
config "github.com/gitpod-io/gitpod/ws-manager/api/config"
|
|
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
const (
|
|
metricsNamespace = "gitpod"
|
|
metricsWorkspaceSubsystem = "ws_manager_mk2"
|
|
// kubernetesOperationTimeout is the time we give Kubernetes operations in general.
|
|
kubernetesOperationTimeout = 5 * time.Second
|
|
)
|
|
|
|
func NewWorkspaceReconciler(c client.Client, scheme *runtime.Scheme, cfg *config.Configuration, reg prometheus.Registerer) (*WorkspaceReconciler, error) {
|
|
reconciler := &WorkspaceReconciler{
|
|
Client: c,
|
|
Scheme: scheme,
|
|
Config: cfg,
|
|
}
|
|
|
|
metrics, err := newControllerMetrics(reconciler)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
reg.MustRegister(metrics)
|
|
reconciler.metrics = metrics
|
|
|
|
return reconciler, nil
|
|
}
|
|
|
|
// WorkspaceReconciler reconciles a Workspace object
|
|
type WorkspaceReconciler struct {
|
|
client.Client
|
|
Scheme *runtime.Scheme
|
|
|
|
Config *config.Configuration
|
|
metrics *controllerMetrics
|
|
OnReconcile func(ctx context.Context, ws *workspacev1.Workspace)
|
|
}
|
|
|
|
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=workspaces,verbs=get;list;watch;create;update;patch;delete
|
|
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=workspaces/status,verbs=get;update;patch
|
|
//+kubebuilder:rbac:groups=workspace.gitpod.io,resources=workspaces/finalizers,verbs=update
|
|
//+kubebuilder:rbac:groups=core,resources=pod,verbs=get;list;watch;create;update;patch;delete
|
|
//+kubebuilder:rbac:groups=core,resources=pod/status,verbs=get
|
|
|
|
// Reconcile is part of the main kubernetes reconciliation loop which aims to
|
|
// move the current state of the cluster closer to the desired state.
|
|
// Modify the Reconcile function to compare the state specified by
|
|
// the Workspace object against the actual cluster state, and then
|
|
// perform operations to make the cluster state reflect the state specified by
|
|
// the user.
|
|
//
|
|
// For more details, check Reconcile and its Result here:
|
|
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
|
|
func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
|
log := log.FromContext(ctx)
|
|
|
|
var workspace workspacev1.Workspace
|
|
if err := r.Get(ctx, req.NamespacedName, &workspace); err != nil {
|
|
if !errors.IsNotFound(err) {
|
|
log.Error(err, "unable to fetch workspace")
|
|
}
|
|
// we'll ignore not-found errors, since they can't be fixed by an immediate
|
|
// requeue (we'll need to wait for a new notification), and we can get them
|
|
// on deleted requests.
|
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
|
}
|
|
|
|
if workspace.Status.Conditions == nil {
|
|
workspace.Status.Conditions = []metav1.Condition{}
|
|
}
|
|
|
|
log.Info("reconciling workspace", "ws", req.NamespacedName)
|
|
|
|
var workspacePods corev1.PodList
|
|
err := r.List(ctx, &workspacePods, client.InNamespace(req.Namespace), client.MatchingFields{wsOwnerKey: req.Name})
|
|
if err != nil {
|
|
log.Error(err, "unable to list workspace pods")
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
err = updateWorkspaceStatus(ctx, &workspace, workspacePods, r.Config)
|
|
if err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
r.updateMetrics(ctx, &workspace)
|
|
|
|
result, err := r.actOnStatus(ctx, &workspace, workspacePods)
|
|
if err != nil {
|
|
return result, err
|
|
}
|
|
|
|
err = r.Status().Update(ctx, &workspace)
|
|
if err != nil {
|
|
// log.WithValues("status", workspace).Error(err, "unable to update workspace status")
|
|
return ctrl.Result{Requeue: true}, err
|
|
}
|
|
|
|
if r.OnReconcile != nil {
|
|
r.OnReconcile(ctx, &workspace)
|
|
}
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *workspacev1.Workspace, workspacePods corev1.PodList) (ctrl.Result, error) {
|
|
log := log.FromContext(ctx)
|
|
|
|
if len(workspacePods.Items) == 0 {
|
|
// if there isn't a workspace pod and we're not currently deleting this workspace,// create one.
|
|
switch {
|
|
case workspace.Status.PodStarts == 0:
|
|
sctx, err := newStartWorkspaceContext(ctx, r.Config, workspace)
|
|
if err != nil {
|
|
log.Error(err, "unable to create startWorkspace context")
|
|
return ctrl.Result{Requeue: true}, err
|
|
}
|
|
|
|
pod, err := r.createWorkspacePod(sctx)
|
|
if err != nil {
|
|
log.Error(err, "unable to produce workspace pod")
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
if err := ctrl.SetControllerReference(workspace, pod, r.Scheme); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
err = r.Create(ctx, pod)
|
|
if errors.IsAlreadyExists(err) {
|
|
// pod exists, we're good
|
|
} else if err != nil {
|
|
log.Error(err, "unable to create Pod for Workspace", "pod", pod)
|
|
return ctrl.Result{Requeue: true}, err
|
|
} else {
|
|
// TODO(cw): replicate the startup mechanism where pods can fail to be scheduled,
|
|
// need to be deleted and re-created
|
|
workspace.Status.PodStarts++
|
|
}
|
|
r.metrics.rememberWorkspace(workspace)
|
|
|
|
case workspace.Status.Phase == workspacev1.WorkspacePhaseStopped:
|
|
// Done stopping workspace - remove finalizer.
|
|
if controllerutil.ContainsFinalizer(workspace, workspacev1.GitpodFinalizerName) {
|
|
controllerutil.RemoveFinalizer(workspace, workspacev1.GitpodFinalizerName)
|
|
if err := r.Update(ctx, workspace); err != nil {
|
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
|
}
|
|
}
|
|
|
|
// Workspace might have already been in a deleting state,
|
|
// but not guaranteed, so try deleting anyway.
|
|
err := r.Client.Delete(ctx, workspace)
|
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
|
}
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
// all actions below assume there is a pod
|
|
if len(workspacePods.Items) == 0 {
|
|
return ctrl.Result{}, nil
|
|
}
|
|
pod := &workspacePods.Items[0]
|
|
|
|
switch {
|
|
// if there is a pod, and it's failed, delete it
|
|
case wsk8s.ConditionPresentAndTrue(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionFailed)) && !isPodBeingDeleted(pod):
|
|
err := r.Client.Delete(ctx, pod)
|
|
if errors.IsNotFound(err) {
|
|
// pod is gone - nothing to do here
|
|
} else {
|
|
return ctrl.Result{Requeue: true}, err
|
|
}
|
|
|
|
// if the pod was stopped by request, delete it
|
|
case wsk8s.ConditionPresentAndTrue(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionStoppedByRequest)) && !isPodBeingDeleted(pod):
|
|
var gracePeriodSeconds *int64
|
|
if c := wsk8s.GetCondition(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionStoppedByRequest)); c != nil {
|
|
if dt, err := time.ParseDuration(c.Message); err == nil {
|
|
s := int64(dt.Seconds())
|
|
gracePeriodSeconds = &s
|
|
}
|
|
}
|
|
err := r.Client.Delete(ctx, pod, &client.DeleteOptions{
|
|
GracePeriodSeconds: gracePeriodSeconds,
|
|
})
|
|
if errors.IsNotFound(err) {
|
|
// pod is gone - nothing to do here
|
|
} else {
|
|
return ctrl.Result{Requeue: true}, err
|
|
}
|
|
|
|
// if the workspace timed out, delete it
|
|
case wsk8s.ConditionPresentAndTrue(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionTimeout)) && !isPodBeingDeleted(pod):
|
|
err := r.Client.Delete(ctx, pod)
|
|
if errors.IsNotFound(err) {
|
|
// pod is gone - nothing to do here
|
|
} else {
|
|
return ctrl.Result{Requeue: true}, err
|
|
}
|
|
|
|
// if the content initialization failed, delete the pod
|
|
case wsk8s.ConditionWithStatusAndReason(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, "InitializationFailure") && !isPodBeingDeleted(pod):
|
|
err := r.Client.Delete(ctx, pod)
|
|
if errors.IsNotFound(err) {
|
|
// pod is gone - nothing to do here
|
|
} else {
|
|
return ctrl.Result{Requeue: true}, err
|
|
}
|
|
|
|
case isWorkspaceBeingDeleted(workspace) && !isPodBeingDeleted(pod):
|
|
// Workspace was requested to be deleted, propagate by deleting the Pod.
|
|
// The Pod deletion will then trigger workspace disposal steps.
|
|
err := r.Client.Delete(ctx, pod)
|
|
if errors.IsNotFound(err) {
|
|
// pod is gone - nothing to do here
|
|
} else {
|
|
return ctrl.Result{Requeue: true}, err
|
|
}
|
|
|
|
// we've disposed already - try to remove the finalizer and call it a day
|
|
case workspace.Status.Phase == workspacev1.WorkspacePhaseStopped:
|
|
hadFinalizer := controllerutil.ContainsFinalizer(pod, workspacev1.GitpodFinalizerName)
|
|
controllerutil.RemoveFinalizer(pod, workspacev1.GitpodFinalizerName)
|
|
if err := r.Client.Update(ctx, pod); err != nil {
|
|
return ctrl.Result{}, fmt.Errorf("failed to remove gitpod finalizer: %w", err)
|
|
}
|
|
|
|
if hadFinalizer {
|
|
// Requeue to remove workspace.
|
|
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
|
|
}
|
|
}
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
func (r *WorkspaceReconciler) updateMetrics(ctx context.Context, workspace *workspacev1.Workspace) {
|
|
log := log.FromContext(ctx)
|
|
|
|
phase := workspace.Status.Phase
|
|
|
|
if !r.metrics.shouldUpdate(&log, workspace) {
|
|
return
|
|
}
|
|
|
|
switch {
|
|
case phase == workspacev1.WorkspacePhasePending ||
|
|
phase == workspacev1.WorkspacePhaseCreating ||
|
|
phase == workspacev1.WorkspacePhaseInitializing:
|
|
|
|
if wsk8s.ConditionWithStatusAndReason(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady), false, "InitializationFailure") {
|
|
r.metrics.countTotalRestoreFailures(&log, workspace)
|
|
r.metrics.countWorkspaceStartFailures(&log, workspace)
|
|
}
|
|
|
|
if wsk8s.ConditionPresentAndTrue(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionFailed)) {
|
|
r.metrics.countWorkspaceStartFailures(&log, workspace)
|
|
}
|
|
|
|
case phase == workspacev1.WorkspacePhaseRunning:
|
|
r.metrics.recordWorkspaceStartupTime(&log, workspace)
|
|
if wsk8s.ConditionPresentAndTrue(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady)) {
|
|
r.metrics.countTotalRestores(&log, workspace)
|
|
}
|
|
|
|
case phase == workspacev1.WorkspacePhaseStopped:
|
|
if wsk8s.ConditionPresentAndTrue(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionBackupFailure)) {
|
|
r.metrics.countTotalBackups(&log, workspace)
|
|
r.metrics.countTotalBackupFailures(&log, workspace)
|
|
}
|
|
|
|
if wsk8s.ConditionPresentAndTrue(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionBackupComplete)) {
|
|
r.metrics.countTotalBackups(&log, workspace)
|
|
}
|
|
|
|
r.metrics.countWorkspaceStop(&log, workspace)
|
|
r.metrics.forgetWorkspace(workspace)
|
|
return
|
|
}
|
|
|
|
r.metrics.rememberWorkspace(workspace)
|
|
}
|
|
|
|
var (
|
|
wsOwnerKey = ".metadata.controller"
|
|
apiGVStr = workspacev1.GroupVersion.String()
|
|
)
|
|
|
|
// SetupWithManager sets up the controller with the Manager.
|
|
func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|
idx := func(rawObj client.Object) []string {
|
|
// grab the job object, extract the owner...
|
|
job := rawObj.(*corev1.Pod)
|
|
owner := metav1.GetControllerOf(job)
|
|
if owner == nil {
|
|
return nil
|
|
}
|
|
// ...make sure it's a workspace...
|
|
if owner.APIVersion != apiGVStr || owner.Kind != "Workspace" {
|
|
return nil
|
|
}
|
|
|
|
// ...and if so, return it
|
|
return []string{owner.Name}
|
|
}
|
|
err := mgr.GetFieldIndexer().IndexField(context.Background(), &corev1.Pod{}, wsOwnerKey, idx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return ctrl.NewControllerManagedBy(mgr).
|
|
For(&workspacev1.Workspace{}).
|
|
Owns(&corev1.Pod{}).
|
|
Complete(r)
|
|
}
|