277 lines
11 KiB
Go

// Copyright (c) 2020 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.
package manager
import (
"context"
"encoding/json"
"github.com/gitpod-io/gitpod/common-go/tracing"
"github.com/gitpod-io/gitpod/ws-manager/api"
"strings"
"time"
"golang.org/x/xerrors"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/retry"
)
const (
// workspaceIDAnnotation is the annotation on the WS pod which contains the workspace ID
workspaceIDAnnotation = "gitpod/id"
// servicePrefixAnnotation is the annotation on the WS pod which contains the service prefix
servicePrefixAnnotation = "gitpod/servicePrefix"
// workspaceURLAnnotation is the annotation on the WS pod which contains the public workspace URL
workspaceURLAnnotation = "gitpod/url"
// workspaceNeverReadyAnnotation marks a workspace as having never been ready. It's the inverse of the former workspaceReadyAnnotation
workspaceNeverReadyAnnotation = "gitpod/never-ready"
// workspaceTimedOutAnnotation marks a workspae as timed out by the ws-manager
workspaceTimedOutAnnotation = "gitpod/timedout"
// workspaceClosedAnnotation marks a workspace as closed by the user - this affects the timeout of a workspace
workspaceClosedAnnotation = "gitpod/closed"
// workspaceExplicitFailAnnotation marks a workspace as failed because of some runtime reason, e.g. the task that ran in it failed (used for headless workspaces)
workspaceExplicitFailAnnotation = "gitpod/explicitFail"
// workspaceSnapshotAnnotation stores a workspace's snapshot if one was taken prior to shutdown
workspaceSnapshotAnnotation = "gitpod/snapshot"
// workspaceInitializerAnnotation contains the protobuf serialized initializer config in base64 encoding. We need to keep this around post-request
// as we'll pass on the request to ws-daemon later in the workspace's lifecycle. This is not a configmap as we cannot create the map prior to the pod,
// because then we would not know which configmaps to delete; we cannot create the map after the pod as then the pod could reach the state what the
// configmap is needed, but isn't present yet.
// According to the K8S documentation, storing "large" amounts of data in annotations is not an issue:
// https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/#attaching-metadata-to-objects
workspaceInitializerAnnotation = "gitpod/contentInitializer"
// workspaceImageSpecAnnotation contains the protobuf serialized image spec in base64 encoding. We need to keep this around post-request
// to provide this information to the registry facade later in the workspace's lifecycle.
workspaceImageSpecAnnotation = "gitpod/imageSpec"
// workspaceFailedBeforeStoppingAnnotation marks a workspace as failed even before we tried
// to stop it. We do not extract the failure state from this annotation, but just stabilize
// the state computation.
workspaceFailedBeforeStoppingAnnotation = "gitpod/failedBeforeStopping"
// customTimeoutAnnotation configures the activity timeout of a workspace, i.e. the timeout a user experiences when not using an otherwise active workspace for some time.
// This is handy if you want to prevent a workspace from timing out during lunch break.
customTimeoutAnnotation = "gitpod/customTimeout"
// firstUserActivityAnnotation marks a workspace woth the timestamp of first user activity in it
firstUserActivityAnnotation = "gitpod/firstUserActivity"
// fullWorkspaceBackupAnnotation is set on workspaces which operate using a full workspace backup
fullWorkspaceBackupAnnotation = "gitpod/fullWorkspaceBackup"
// ownerTokenAnnotation contains the owner token of the workspace
ownerTokenAnnotation = "gitpod/ownerToken"
// workspaceAdmissionAnnotation determines the user admission to a workspace, i.e. if it can be accessed by everyone without token
workspaceAdmissionAnnotation = "gitpod/admission"
// ingressPortsAnnotation holds the mapping workspace port -> allocated ingress port on kubernetes services
ingressPortsAnnotation = "gitpod/ingressPorts"
// withUsernamespaceAnnotation is set on workspaces which are wrapped in a user namespace (or have some form of user namespace support)
// Beware: this annotation is duplicated/copied in ws-daemon
withUsernamespaceAnnotation = "gitpod/withUsernamespace"
)
// markWorkspaceAsReady adds annotations to a workspace pod
func (m *Manager) markWorkspace(workspaceID string, annotations ...*annotation) error {
client := m.Clientset.CoreV1().Pods(m.Config.Namespace)
// Retry on failure. Sometimes this doesn't work because of concurrent modification. The Kuberentes way is to just try again after waiting a bit.
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
pod, err := m.findWorkspacePod(workspaceID)
if err != nil {
return xerrors.Errorf("cannot find workspace %s: %w", workspaceID, err)
}
if pod == nil {
return xerrors.Errorf("workspace %s does not exist", workspaceID)
}
for _, a := range annotations {
a.Apply(pod.Annotations)
// Optimization: if we're failing the workspace explicitly, we might as well add the workspaceFailedBeforeStoppingAnnotation
// as well. If we didin't do this here, the monitor would do that for us down the road, but this way need one fewer modification
// of the pod.
if a.Name == workspaceExplicitFailAnnotation {
pod.Annotations[workspaceFailedBeforeStoppingAnnotation] = "true"
}
}
_, err = client.Update(pod)
return err
})
if err != nil {
an := make([]string, len(annotations))
for i, a := range annotations {
if a.Delete {
an[i] = "-" + a.Name
} else {
an[i] = "+" + a.Name
}
}
return xerrors.Errorf("cannot mark workspace %s with %v: %w", workspaceID, strings.Join(an, ", "), err)
}
return nil
}
func addMark(name, value string) *annotation {
return &annotation{name, value, false}
}
func deleteMark(name string) *annotation {
return &annotation{name, "", true}
}
// annotation is a piece of metadata added to a workspace
type annotation struct {
Name string
Value string
Delete bool
}
func (a *annotation) Apply(dst map[string]string) (needsUpdate bool) {
_, wasPresent := dst[a.Name]
if a.Delete {
needsUpdate = wasPresent
delete(dst, a.Name)
} else {
needsUpdate = !wasPresent
dst[a.Name] = a.Value
}
return
}
const (
// plisConfigMapMember is the name of the data entry of pod lifecycle independent state in a config map
plisDataAnnotation = "gitpod/plis"
)
// podLifecycleIndependentState (PLIS) stores all data we cannot store on the pod because it supersedes the lifecycle
// of a workspace pod. Examples include the backup state, as the pod may be stopped/deleted, but the backup may
// still be running.
//
// Beware: The pod lifecycle independent state is really a measure of last resort.
// Before adding to this structure, talk to someone else and make sure there is no better way!
type podLifecycleIndependentState struct {
FinalBackupComplete bool `json:"finalBackupComplete,omitempty"`
FinalBackupFailure string `json:"finalBackupFailure,omitempty"`
StoppingSince *time.Time `json:"stoppingSince,omitempty"`
// LastPodStatus is the status we computed just before the workspace pod was deleted
LastPodStatus *api.WorkspaceStatus `json:"lastPodStatus,omitempty"`
// HostIP is the IP address of the node the workspace pod is/was deployed to
HostIP string `json:"hostIP,omitempty"`
}
// patchPodLifecycleIndependentState updates the pod lifecycle independent state of a workspace by setting the
// non-zero values of the patch. Calling this function triggers a status update. This function is
// neither atomic, nor synchronized.
func (m *Manager) patchPodLifecycleIndependentState(ctx context.Context, workspaceID string, patch func(*podLifecycleIndependentState) (needsUpdate bool), annotations ...*annotation) (err error) {
span, ctx := tracing.FromContext(ctx, "patchPodLifecycleIndependentState")
defer tracing.FinishSpan(span, &err)
err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
plisCfg, err := m.Clientset.CoreV1().ConfigMaps(m.Config.Namespace).Get(getPodLifecycleIndependentCfgMapName(workspaceID), metav1.GetOptions{})
if isKubernetesObjNotFoundError(err) {
return xerrors.Errorf("workspace %s has no pod lifecycle independent state", workspaceID)
}
if err != nil {
return xerrors.Errorf("cannot retrieve pod lifecycle independent state: %w", err)
}
tracing.LogEvent(span, "k8s get done")
needsUpdate := false
if patch != nil {
plis, err := unmarshalPodLifecycleIndependentState(plisCfg)
if err != nil {
return xerrors.Errorf("patch pod lifecycle independent state: %w", err)
}
if plis == nil {
plis = &podLifecycleIndependentState{}
}
tracing.LogEvent(span, "unmarshalling done")
tracing.LogKV(span, "prePatchPLIS", plisCfg.Annotations[plisDataAnnotation])
needsUpdate = patch(plis)
tracing.LogEvent(span, "patch done")
err = marshalPodLifecycleIndependentState(plisCfg, plis)
if err != nil {
return xerrors.Errorf("patch lifecycle independent state: %w", err)
}
tracing.LogEvent(span, "marshalling done")
}
for _, a := range annotations {
doUpdate := a.Apply(plisCfg.Annotations)
needsUpdate = needsUpdate || doUpdate
}
if !needsUpdate {
return nil
}
tracing.LogKV(span, "postPatchPLIS", plisCfg.Annotations[plisDataAnnotation])
tracing.LogKV(span, "needsUpdate", "true")
_, err = m.Clientset.CoreV1().ConfigMaps(m.Config.Namespace).Update(plisCfg)
return err
})
if err != nil {
return xerrors.Errorf("patch lifecycle independent state: %w", err)
}
tracing.LogEvent(span, "k8s update done")
return nil
}
// unmarshalPodLifecycleIndependentState reads the podLifecycleIndependentState JSON from the config map and tries to unmarshal it
func unmarshalPodLifecycleIndependentState(cfg *corev1.ConfigMap) (*podLifecycleIndependentState, error) {
if cfg == nil {
// no config map => nothing to unmarshal
return nil, nil
}
rawPLIS, ok := cfg.Annotations[plisDataAnnotation]
if !ok {
// there's nothing for us in this config map
return nil, nil
}
var result podLifecycleIndependentState
err := json.Unmarshal([]byte(rawPLIS), &result)
if err != nil {
return nil, xerrors.Errorf("cannot unmarshal pod lifecycle independent state: %w", err)
}
return &result, nil
}
// marshalPLIS takes a podLifecycleIndependentState instance and stores it in the config map
func marshalPodLifecycleIndependentState(dst *corev1.ConfigMap, plis *podLifecycleIndependentState) error {
rawPLIS, err := json.Marshal(plis)
if err != nil {
return xerrors.Errorf("cannot marshal pod lifecycle independent state: %w")
}
// We're not putting the PLIS JSON in the config map data as that takes about 10x as long as storing it in an annotation.
dst.Annotations[plisDataAnnotation] = string(rawPLIS)
return nil
}