gitpod/components/ws-manager-mk2/controllers/status.go

// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package controllers

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"strings"
	"time"

	wsk8s "github.com/gitpod-io/gitpod/common-go/kubernetes"
	"github.com/gitpod-io/gitpod/common-go/tracing"
	config "github.com/gitpod-io/gitpod/ws-manager/api/config"
	workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
	"github.com/go-logr/logr"
	"golang.org/x/xerrors"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/types"
	"sigs.k8s.io/controller-runtime/pkg/log"
)

const (
	// containerKilledExitCode is the exit code Kubernetes uses for a container which was killed by the system.
	// We expect such containers to be restarted by Kubernetes if they're supposed to be running.
	// We never deliberately terminate a container like this.
	containerKilledExitCode = 137

	// containerUnknownExitCode is the exit code containerd uses if it cannot determine the cause/exit status of
	// a stopped container.
	containerUnknownExitCode = 255

	// headlessTaskFailedPrefix is the prefix of the pod termination message if a headless task failed (e.g. user error
	// or aborted prebuild).
	headlessTaskFailedPrefix = "headless task failed: "

	// podRejectedReasonNodeAffinity is the value of pod.status.Reason in case the pod got rejected by kubelet because of a NodeAffinity mismatch
	podRejectedReasonNodeAffinity = "NodeAffinity"

	// podRejectedReasonOutOfCPU is the value of pod.status.Reason in case the pod got rejected by kubelet because of insufficient CPU available
	podRejectedReasonOutOfCPU = "OutOfcpu"

	// podRejectedReasonOutOfMemory is the value of pod.status.Reason in case the pod got rejected by kubelet because of insufficient memory available
	podRejectedReasonOutOfMemory = "OutOfmemory"
)

func (r *WorkspaceReconciler) updateWorkspaceStatus(ctx context.Context, workspace *workspacev1.Workspace, pods *corev1.PodList, cfg *config.Configuration) (err error) {
	span, ctx := tracing.FromContext(ctx, "updateWorkspaceStatus")
	defer tracing.FinishSpan(span, &err)
	log := log.FromContext(ctx).WithValues("owi", workspace.OWI())
	ctx = logr.NewContext(ctx, log)

	oldPhase := workspace.Status.Phase
	defer func() {
		if oldPhase != workspace.Status.Phase {
			log.Info("workspace phase updated", "oldPhase", oldPhase, "phase", workspace.Status.Phase)
			if workspace.Status.Phase == workspacev1.WorkspacePhaseStopping {
				t := metav1.Now()
				workspace.Status.PodStoppingTime = &t
			}
		}
	}()

	switch len(pods.Items) {
	case 0:
		if workspace.Status.Phase == "" {
			workspace.Status.Phase = workspacev1.WorkspacePhasePending
		}

		if workspace.Status.Phase == workspacev1.WorkspacePhaseStopping && isDisposalFinished(workspace) {
			workspace.Status.Phase = workspacev1.WorkspacePhaseStopped
		}

		if workspace.Status.Phase == workspacev1.WorkspacePhaseStopped && workspace.Status.PodDeletionTime == nil {
			// Set the timestamp when we first saw the pod as deleted.
			// This is used for the delaying eventual pod restarts
			podDeletionTime := metav1.NewTime(time.Now())
			workspace.Status.PodDeletionTime = &podDeletionTime
		}

		workspace.UpsertConditionOnStatusChange(workspacev1.NewWorkspaceConditionContainerRunning(metav1.ConditionFalse))
		return nil
	case 1:
		// continue below
	default:
		// This is exceptional - not sure what to do here. Probably fail the pod
		workspace.Status.SetCondition(
			workspacev1.NewWorkspaceConditionFailed("multiple pods exists - this should never happen"))
		return nil
	}

	if c := wsk8s.GetCondition(workspace.Status.Conditions, string(workspacev1.WorkspaceConditionDeployed)); c == nil {
		workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionDeployed())
	}

	pod := &pods.Items[0]

	if workspace.Status.Runtime == nil {
		workspace.Status.Runtime = &workspacev1.WorkspaceRuntimeStatus{}
	}
	if workspace.Status.Runtime.NodeName == "" && pod.Spec.NodeName != "" {
		workspace.Status.Runtime.NodeName = pod.Spec.NodeName
	}
	if workspace.Status.Runtime.HostIP == "" && pod.Status.HostIP != "" {
		workspace.Status.Runtime.HostIP = pod.Status.HostIP
	}
	if workspace.Status.Runtime.PodIP == "" && pod.Status.PodIP != "" {
		workspace.Status.Runtime.PodIP = pod.Status.PodIP
	}
	if workspace.Status.Runtime.PodName == "" && pod.Name != "" {
		workspace.Status.Runtime.PodName = pod.Name
	}

	// Check if the node has disappeared. If so, ws-daemon has also disappeared and we need to
	// mark the workspace backup as failed if it didn't complete disposal yet.
	// Otherwise, the workspace will be stuck in the Stopping phase forever.
	if err := r.checkNodeDisappeared(ctx, workspace, pod); err != nil {
		return err
	}

	if workspace.Status.URL == "" {
		url, err := config.RenderWorkspaceURL(cfg.WorkspaceURLTemplate, workspace.Name, workspace.Spec.Ownership.WorkspaceID, cfg.GitpodHostURL)
		if err != nil {
			return xerrors.Errorf("cannot get workspace URL: %w", err)
		}
		workspace.Status.URL = url
	}

	if workspace.Status.OwnerToken == "" {
		ownerToken, err := getRandomString(32)
		if err != nil {
			return xerrors.Errorf("cannot create owner token: %w", err)
		}
		workspace.Status.OwnerToken = ownerToken
	}

	failure, phase := r.extractFailure(ctx, workspace, pod)
	if phase != nil {
		workspace.Status.Phase = *phase
	}

	if failure != "" && !workspace.IsConditionTrue(workspacev1.WorkspaceConditionPodRejected) {
		// Check: A situation where we want to retry?
		if isPodRejected(pod) {
			if !workspace.IsConditionTrue(workspacev1.WorkspaceConditionEverReady) {
				// This is a situation where we want to re-create the pod!
				log.Info("workspace got rejected", "workspace", workspace.Name, "reason", failure)
				workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionPodRejected(failure, metav1.ConditionTrue))
				r.Recorder.Event(workspace, corev1.EventTypeWarning, "PodRejected", failure)
			} else {
				log.Info("workspace got rejected, but we don't handle it, because EveryReady=true", "workspace", workspace.Name, "reason", failure)
			}
		}
	}

	if failure != "" && !workspace.IsConditionTrue(workspacev1.WorkspaceConditionFailed) {
		var nodeName string
		if workspace.Status.Runtime != nil {
			nodeName = workspace.Status.Runtime.NodeName
		}
		// workspaces can fail only once - once there is a failed condition set, stick with it
		log.Info("workspace failed", "workspace", workspace.Name, "node", nodeName, "reason", failure)
		workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionFailed(failure))
		r.Recorder.Event(workspace, corev1.EventTypeWarning, "Failed", failure)
	}

	if workspace.IsHeadless() && !workspace.IsConditionTrue(workspacev1.WorkspaceConditionsHeadlessTaskFailed) {
		for _, cs := range pod.Status.ContainerStatuses {
			if cs.State.Terminated != nil && cs.State.Terminated.Message != "" {
				workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionHeadlessTaskFailed(cs.State.Terminated.Message))
				break
			}
		}
	}

	if isWorkspaceContainerRunning(pod.Status.ContainerStatuses) {
		workspace.UpsertConditionOnStatusChange(workspacev1.NewWorkspaceConditionContainerRunning(metav1.ConditionTrue))
	} else {
		workspace.UpsertConditionOnStatusChange(workspacev1.NewWorkspaceConditionContainerRunning(metav1.ConditionFalse))
	}

	switch {
	case isPodBeingDeleted(pod):
		if workspace.Status.Phase == workspacev1.WorkspacePhaseStopping && isDisposalFinished(workspace) {
			workspace.Status.Phase = workspacev1.WorkspacePhaseStopped
		} else if workspace.Status.Phase != workspacev1.WorkspacePhaseStopped {
			// Move to (or stay in) Stopping if not yet Stopped.
			workspace.Status.Phase = workspacev1.WorkspacePhaseStopping
		}

	case pod.Status.Phase == corev1.PodPending:
		var creating bool
		// check if any container is still pulling images
		for _, cs := range pod.Status.ContainerStatuses {
			if cs.State.Waiting != nil {
				switch cs.State.Waiting.Reason {
				case "ContainerCreating", "ImagePullBackOff", "ErrImagePull":
					creating = true
				}

				if creating {
					break
				}
			}
		}
		if creating {
			workspace.Status.Phase = workspacev1.WorkspacePhaseCreating
		} else {
			workspace.Status.Phase = workspacev1.WorkspacePhasePending
		}

	case pod.Status.Phase == corev1.PodRunning:
		everReady := workspace.IsConditionTrue(workspacev1.WorkspaceConditionEverReady)
		if everReady {
			// If the workspace has been ready before, stay in a Running state, even
			// if the workspace container is not ready anymore. This is to avoid the workspace
			// moving back to Initializing and becoming unusable.
			workspace.Status.Phase = workspacev1.WorkspacePhaseRunning
		} else {
			contentReady := workspace.IsConditionTrue(workspacev1.WorkspaceConditionContentReady)
			var ideReady bool
			for _, cs := range pod.Status.ContainerStatuses {
				if cs.Ready {
					ideReady = true
					break
				}
			}
			ready := contentReady && ideReady

			if ready {
				// workspace is ready - hence content init is done
				workspace.Status.Phase = workspacev1.WorkspacePhaseRunning
				if !workspace.IsConditionTrue(workspacev1.WorkspaceConditionEverReady) {
					workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionEverReady())
				}
			} else {
				// workspace has not become ready yet - it must be initializing then.
				workspace.Status.Phase = workspacev1.WorkspacePhaseInitializing
			}
		}

	case workspace.IsHeadless() && (pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed):
		if pod.Status.Phase == corev1.PodSucceeded && !workspace.IsConditionTrue(workspacev1.WorkspaceConditionEverReady) {
			// Fix for Prebuilds that instantly succeed (e.g. empty task), sometimes we don't observe the
			// workspace `Running` phase for these, and never had the opportunity to add the EverReady condition.
			// This would then cause a "start failure" in the metrics. So we retroactively add the EverReady
			// condition here if the pod succeeded.
			workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionEverReady())
		}

		if workspace.Status.Phase == workspacev1.WorkspacePhaseStopping && isDisposalFinished(workspace) {
			workspace.Status.Phase = workspacev1.WorkspacePhaseStopped
		} else if workspace.Status.Phase != workspacev1.WorkspacePhaseStopped {
			// Should be in Stopping phase, but isn't yet.
			// Move to Stopping to start disposal, but only if maintenance mode is disabled.
			if !r.maintenance.IsEnabled(ctx) {
				workspace.Status.Phase = workspacev1.WorkspacePhaseStopping
			}
		}

	case pod.Status.Phase == corev1.PodUnknown:
		workspace.Status.Phase = workspacev1.WorkspacePhaseUnknown

	default:
		log.Info("cannot determine workspace phase", "podStatus", pod.Status)
		workspace.Status.Phase = workspacev1.WorkspacePhaseUnknown

	}

	return nil
}

func (r *WorkspaceReconciler) checkNodeDisappeared(ctx context.Context, workspace *workspacev1.Workspace, pod *corev1.Pod) (err error) {
	span, ctx := tracing.FromContext(ctx, "checkNodeDisappeared")
	defer tracing.FinishSpan(span, &err)

	if pod.Spec.NodeName == "" {
		// Not yet scheduled.
		return nil
	}

	var node corev1.Node
	err = r.Get(ctx, types.NamespacedName{Namespace: "", Name: pod.Spec.NodeName}, &node)
	if err == nil || !errors.IsNotFound(err) {
		return err
	}

	// If NodeDisappeared is already set, return early, we've already made the below checks previously.
	if workspace.IsConditionTrue(workspacev1.WorkspaceConditionNodeDisappeared) {
		return nil
	}

	if !isDisposalFinished(workspace) {
		// Node disappeared before a backup could be taken, mark it with a backup failure.
		log.FromContext(ctx).Error(nil, "workspace node disappeared while disposal has not finished yet", "node", pod.Spec.NodeName)
		workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionBackupFailure("workspace node disappeared before backup was taken"))
	}

	// Must set this after checking isDisposalFinished, as that method also checks for the NodeDisappeared condition.
	workspace.Status.SetCondition(workspacev1.NewWorkspaceConditionNodeDisappeared())
	return nil
}

func isDisposalFinished(ws *workspacev1.Workspace) bool {
	if ws.IsConditionTrue(workspacev1.WorkspaceConditionPodRejected) {
		if c := wsk8s.GetCondition(ws.Status.Conditions, string(workspacev1.WorkspaceConditionStateWiped)); c != nil {
			// If the condition is set, we are done with the disposal
			return true
		}
		// If the condition has not yet been set, we are not done, yet.
		return false
	}

	return ws.IsConditionTrue(workspacev1.WorkspaceConditionBackupComplete) ||
		ws.IsConditionTrue(workspacev1.WorkspaceConditionBackupFailure) ||
		ws.IsConditionTrue(workspacev1.WorkspaceConditionAborted) ||
		// Nothing to dispose if content wasn't ready.
		!ws.IsConditionTrue(workspacev1.WorkspaceConditionContentReady) ||
		// Can't dispose if node disappeared.
		ws.IsConditionTrue(workspacev1.WorkspaceConditionNodeDisappeared) ||
		// Image builds have nothing to dispose.
		ws.Spec.Type == workspacev1.WorkspaceTypeImageBuild
}

// extractFailure returns a pod failure reason and possibly a phase. If phase is nil then
// one should extract the phase themselves. If the pod has not failed, this function returns "", nil.
// This failure is then stored in the Failed condition on the workspace.
func (r *WorkspaceReconciler) extractFailure(ctx context.Context, ws *workspacev1.Workspace, pod *corev1.Pod) (string, *workspacev1.WorkspacePhase) {
	// Check for content init failure.
	if c := wsk8s.GetCondition(ws.Status.Conditions, string(workspacev1.WorkspaceConditionContentReady)); c != nil {
		if c.Status == metav1.ConditionFalse && c.Reason == workspacev1.ReasonInitializationFailure {
			msg := c.Message
			if msg == "" {
				msg = "Content initialization failed for an unknown reason"
			} else {
				msg = fmt.Sprintf("Content initialization failed: %s", msg)
			}
			return msg, nil
		}
	}

	// Check for backup failure.
	if c := wsk8s.GetCondition(ws.Status.Conditions, string(workspacev1.WorkspaceConditionBackupFailure)); c != nil {
		msg := c.Message
		if msg == "" {
			msg = "Backup failed for an unknown reason"
		} else {
			msg = fmt.Sprintf("Backup failed: %s", msg)
		}
		return msg, nil
	}

	// Check for state wiping failure.
	if c := wsk8s.GetCondition(ws.Status.Conditions, string(workspacev1.WorkspaceConditionStateWiped)); c != nil && c.Status == metav1.ConditionFalse {
		msg := c.Message
		if msg == "" {
			msg = "Wiping workspace state failed for an unknown reason"
		} else {
			msg = fmt.Sprintf("Wiping workspace state failed: %s", msg)
		}
		return msg, nil
	}

	status := pod.Status
	if status.Phase == corev1.PodFailed && (status.Reason != "" || status.Message != "") {
		// Don't force the phase to UNKNONWN here to leave a chance that we may detect the actual phase of
		// the workspace, e.g. stopping.
		return fmt.Sprintf("%s: %s", status.Reason, status.Message), nil
	}

	for _, cs := range status.ContainerStatuses {
		if cs.State.Waiting != nil {
			if cs.State.Waiting.Reason == "ImagePullBackOff" || cs.State.Waiting.Reason == "ErrImagePull" {
				// If the image pull failed we were definitely in the api.WorkspacePhase_CREATING phase,
				// unless of course this pod has been deleted already.
				var res *workspacev1.WorkspacePhase
				if isPodBeingDeleted(pod) {
					// The pod is being deleted already and we have to decide the phase based on the presence of the
					// finalizer and disposal status annotation. That code already exists in the remainder of getStatus,
					// hence we defer the decision.
					res = nil
				} else {
					c := workspacev1.WorkspacePhaseCreating
					res = &c
				}
				return fmt.Sprintf("cannot pull image: %s", cs.State.Waiting.Message), res
			}
		}

		terminationState := cs.State.Terminated
		if terminationState == nil {
			terminationState = cs.LastTerminationState.Terminated
		}
		if terminationState != nil {
			// a workspace terminated container is not neccesarily bad. During shutdown workspaces containers
			// can go in this state and that's ok. However, if the workspace was shutting down due to deletion,
			// we would not be here as we've checked for a DeletionTimestamp prior. So let's find out why the
			// container is terminating.
			if terminationState.ExitCode != 0 && terminationState.Message != "" {
				var phase *workspacev1.WorkspacePhase
				if !isPodBeingDeleted(pod) {
					// If the wrote a termination message and is not currently being deleted,
					// then it must have been/be running. If we did not force the phase here,
					// we'd be in unknown.
					running := workspacev1.WorkspacePhaseRunning
					phase = &running
				}

				if terminationState.ExitCode == containerKilledExitCode && terminationState.Reason == "ContainerStatusUnknown" {
					// For some reason, the pod is killed with unknown container status and no taints on the underlying node.
					// Therefore, we skip extracting the failure from the terminated message.
					// ref: https://github.com/gitpod-io/gitpod/issues/12021
					var node corev1.Node
					if ws.Status.Runtime != nil && ws.Status.Runtime.NodeName != "" {
						if err := r.Get(ctx, types.NamespacedName{Namespace: "", Name: ws.Status.Runtime.NodeName}, &node); err == nil && len(node.Spec.Taints) == 0 {
							return "", nil
						}
					}
				}

				if ws.IsHeadless() && strings.HasPrefix(terminationState.Message, headlessTaskFailedPrefix) {
					// Headless task failed, not a workspace failure.
					return "", nil
				}

				// the container itself told us why it was terminated - use that as failure reason
				return extractFailureFromLogs([]byte(terminationState.Message)), phase
			} else if terminationState.Reason == "Error" {
				if !isPodBeingDeleted(pod) && terminationState.ExitCode != containerKilledExitCode {
					phase := workspacev1.WorkspacePhaseRunning
					return fmt.Sprintf("container %s ran with an error: exit code %d", cs.Name, terminationState.ExitCode), &phase
				}
			} else if terminationState.Reason == "Completed" && !isPodBeingDeleted(pod) {
				// Headless workspaces are expected to finish.
				if !ws.IsHeadless() {
					return fmt.Sprintf("container %s completed; containers of a workspace pod are not supposed to do that", cs.Name), nil
				}
			} else if !isPodBeingDeleted(pod) && terminationState.ExitCode != containerUnknownExitCode {
				// if a container is terminated and it wasn't because of either:
				//  - regular shutdown
				//  - the exit code "UNKNOWN" (which might be caused by an intermittent issue and is handled in extractStatusFromPod)
				//  - another known error
				// then we report it as UNKNOWN
				phase := workspacev1.WorkspacePhaseUnknown
				return fmt.Sprintf("workspace container %s terminated for an unknown reason: (%s) %s", cs.Name, terminationState.Reason, terminationState.Message), &phase
			}
		}
	}

	return "", nil
}

func isWorkspaceContainerRunning(statuses []corev1.ContainerStatus) bool {
	for _, cs := range statuses {
		if cs.Name == "workspace" {
			if cs.State.Running != nil {
				return true
			}
			break
		}
	}
	return false
}

// extractFailureFromLogs attempts to extract the last error message from a workspace
// container's log output.
func extractFailureFromLogs(logs []byte) string {
	var sep = []byte("\n")
	var msg struct {
		Error   string `json:"error"`
		Message string `json:"message"`
	}

	var nidx int
	for idx := bytes.LastIndex(logs, sep); idx > 0; idx = nidx {
		nidx = bytes.LastIndex(logs[:idx], sep)
		if nidx < 0 {
			nidx = 0
		}

		line := logs[nidx:idx]
		err := json.Unmarshal(line, &msg)
		if err != nil {
			continue
		}

		if msg.Message == "" {
			continue
		}

		if msg.Error == "" {
			return msg.Message
		}

		return msg.Message + ": " + msg.Error
	}

	return string(logs)
}

// isPodBeingDeleted returns true if the pod is currently being deleted
func isPodBeingDeleted(pod *corev1.Pod) bool {
	// if the pod is being deleted the only marker we have is that the deletionTimestamp is set
	return pod.ObjectMeta.DeletionTimestamp != nil
}

// isWorkspaceBeingDeleted returns true if the workspace resource is currently being deleted.
func isWorkspaceBeingDeleted(ws *workspacev1.Workspace) bool {
	return ws.ObjectMeta.DeletionTimestamp != nil
}

// isPodRejected returns true if the pod has been rejected by the kubelet
func isPodRejected(pod *corev1.Pod) bool {
	return pod.Status.Phase == corev1.PodFailed && (pod.Status.Reason == podRejectedReasonNodeAffinity || pod.Status.Reason == podRejectedReasonOutOfCPU || pod.Status.Reason == podRejectedReasonOutOfMemory) && strings.HasPrefix(pod.Status.Message, "Pod was rejected")
}