2022-03-10 15:46:17 +05:30

519 lines
16 KiB
Go

// Copyright (c) 2020 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.
package initializer
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/opencontainers/go-digest"
"github.com/opentracing/opentracing-go"
"golang.org/x/xerrors"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/gitpod-io/gitpod/common-go/log"
"github.com/gitpod-io/gitpod/common-go/tracing"
csapi "github.com/gitpod-io/gitpod/content-service/api"
"github.com/gitpod-io/gitpod/content-service/pkg/archive"
"github.com/gitpod-io/gitpod/content-service/pkg/git"
"github.com/gitpod-io/gitpod/content-service/pkg/storage"
)
const (
// WorkspaceReadyFile is the name of the ready file we're placing in a workspace
WorkspaceReadyFile = ".gitpod/ready"
// GitpodUID is the user ID of the gitpod user
GitpodUID = 33333
// GitpodGID is the group ID of the gitpod user group
GitpodGID = 33333
// otsDownloadAttempts is the number of times we'll attempt to download the one-time secret
otsDownloadAttempts = 20
)
// Initializer can initialize a workspace with content
type Initializer interface {
Run(ctx context.Context, mappings []archive.IDMapping) (csapi.WorkspaceInitSource, error)
}
// EmptyInitializer does nothing
type EmptyInitializer struct{}
// Run does nothing
func (e *EmptyInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (csapi.WorkspaceInitSource, error) {
return csapi.WorkspaceInitFromOther, nil
}
// CompositeInitializer does nothing
type CompositeInitializer []Initializer
// Run calls run on all child initializers
func (e CompositeInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (csapi.WorkspaceInitSource, error) {
_, ctx = opentracing.StartSpanFromContext(ctx, "CompositeInitializer.Run")
for _, init := range e {
_, err := init.Run(ctx, mappings)
if err != nil {
return csapi.WorkspaceInitFromOther, err
}
}
return csapi.WorkspaceInitFromOther, nil
}
// NewFromRequestOpts configures the initializer produced from a content init request
type NewFromRequestOpts struct {
// ForceGitpodUserForGit forces gitpod:gitpod ownership on all files produced by the Git initializer.
// For FWB workspaces the content init is run from supervisor which runs as UID 0. Using this flag, the
// Git content is forced to the Gitpod user. All other content (backup, prebuild, snapshot) will already
// have the correct user.
ForceGitpodUserForGit bool
}
// NewFromRequest picks the initializer from the request but does not execute it.
// Returns gRPC errors.
func NewFromRequest(ctx context.Context, loc string, rs storage.DirectDownloader, req *csapi.WorkspaceInitializer, opts NewFromRequestOpts) (i Initializer, err error) {
//nolint:ineffassign,staticcheck
span, ctx := opentracing.StartSpanFromContext(ctx, "NewFromRequest")
defer tracing.FinishSpan(span, &err)
span.LogKV("opts", opts)
spec := req.Spec
var initializer Initializer
if _, ok := spec.(*csapi.WorkspaceInitializer_Empty); ok {
initializer = &EmptyInitializer{}
} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Composite); ok {
initializers := make([]Initializer, len(ir.Composite.Initializer))
for i, init := range ir.Composite.Initializer {
initializers[i], err = NewFromRequest(ctx, loc, rs, init, opts)
if err != nil {
return nil, err
}
}
initializer = CompositeInitializer(initializers)
} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Git); ok {
if ir.Git == nil {
return nil, status.Error(codes.InvalidArgument, "missing Git initializer spec")
}
initializer, err = newGitInitializer(ctx, loc, ir.Git, opts.ForceGitpodUserForGit)
} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Prebuild); ok {
if ir.Prebuild == nil {
return nil, status.Error(codes.InvalidArgument, "missing prebuild initializer spec")
}
var snapshot *SnapshotInitializer
if ir.Prebuild.Prebuild != nil {
snapshot, err = newSnapshotInitializer(loc, rs, ir.Prebuild.Prebuild)
if err != nil {
return nil, status.Error(codes.Internal, fmt.Sprintf("cannot setup prebuild init: %v", err))
}
}
var gits []*GitInitializer
for _, gi := range ir.Prebuild.Git {
gitinit, err := newGitInitializer(ctx, loc, gi, opts.ForceGitpodUserForGit)
if err != nil {
return nil, err
}
gits = append(gits, gitinit)
}
initializer = &PrebuildInitializer{
Prebuild: snapshot,
Git: gits,
}
} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Snapshot); ok {
initializer, err = newSnapshotInitializer(loc, rs, ir.Snapshot)
} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Download); ok {
initializer, err = newFileDownloadInitializer(loc, ir.Download)
} else if ir, ok := spec.(*csapi.WorkspaceInitializer_Backup); ok {
initializer, err = newFromBackupInitializer(loc, rs, ir.Backup)
} else {
initializer = &EmptyInitializer{}
}
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
return initializer, nil
}
// newFileDownloadInitializer creates a download initializer for a request
func newFileDownloadInitializer(loc string, req *csapi.FileDownloadInitializer) (*fileDownloadInitializer, error) {
fileInfos := make([]fileInfo, len(req.Files))
for i, f := range req.Files {
dgst, err := digest.Parse(f.Digest)
if err != nil {
return nil, xerrors.Errorf("invalid digest %s: %w", f.Digest, err)
}
fileInfos[i] = fileInfo{
URL: f.Url,
Path: f.FilePath,
Digest: dgst,
}
}
initializer := &fileDownloadInitializer{
FilesInfos: fileInfos,
TargetLocation: filepath.Join(loc, req.TargetLocation),
HTTPClient: http.DefaultClient,
RetryTimeout: 1 * time.Second,
}
return initializer, nil
}
// newFromBackupInitializer creates a backup restoration initializer for a request
func newFromBackupInitializer(loc string, rs storage.DirectDownloader, req *csapi.FromBackupInitializer) (*fromBackupInitializer, error) {
return &fromBackupInitializer{
Location: loc,
RemoteStorage: rs,
}, nil
}
type fromBackupInitializer struct {
Location string
RemoteStorage storage.DirectDownloader
}
func (bi *fromBackupInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (src csapi.WorkspaceInitSource, err error) {
hasBackup, err := bi.RemoteStorage.Download(ctx, bi.Location, storage.DefaultBackup, mappings)
if !hasBackup {
return src, xerrors.Errorf("no backup found")
}
if err != nil {
return src, xerrors.Errorf("cannot restore backup: %w", err)
}
return csapi.WorkspaceInitFromBackup, nil
}
// newGitInitializer creates a Git initializer based on the request.
// Returns gRPC errors.
func newGitInitializer(ctx context.Context, loc string, req *csapi.GitInitializer, forceGitpodUser bool) (*GitInitializer, error) {
if req.Config == nil {
return nil, status.Error(codes.InvalidArgument, "Git initializer misses config")
}
var targetMode CloneTargetMode
switch req.TargetMode {
case csapi.CloneTargetMode_LOCAL_BRANCH:
targetMode = LocalBranch
case csapi.CloneTargetMode_REMOTE_BRANCH:
targetMode = RemoteBranch
case csapi.CloneTargetMode_REMOTE_COMMIT:
targetMode = RemoteCommit
case csapi.CloneTargetMode_REMOTE_HEAD:
targetMode = RemoteHead
default:
return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("invalid target mode: %v", req.TargetMode))
}
var authMethod = git.BasicAuth
if req.Config.Authentication == csapi.GitAuthMethod_NO_AUTH {
authMethod = git.NoAuth
}
// the auth provider must cache the OTS because it may be used several times,
// but can download the one-time-secret only once.
authProvider := git.CachingAuthProvider(func() (user string, pwd string, err error) {
switch req.Config.Authentication {
case csapi.GitAuthMethod_BASIC_AUTH:
user = req.Config.AuthUser
pwd = req.Config.AuthPassword
case csapi.GitAuthMethod_BASIC_AUTH_OTS:
user, pwd, err = downloadOTS(ctx, req.Config.AuthOts)
if err != nil {
log.WithField("location", loc).WithError(err).Error("cannot download Git auth OTS")
return "", "", status.Error(codes.InvalidArgument, "cannot get OTS")
}
case csapi.GitAuthMethod_NO_AUTH:
default:
return "", "", status.Error(codes.InvalidArgument, fmt.Sprintf("invalid Git authentication method: %v", req.Config.Authentication))
}
return
})
log.WithField("location", loc).Debug("using Git initializer")
return &GitInitializer{
Client: git.Client{
Location: filepath.Join(loc, req.CheckoutLocation),
RemoteURI: req.RemoteUri,
UpstreamRemoteURI: req.Upstream_RemoteUri,
Config: req.Config.CustomConfig,
AuthMethod: authMethod,
AuthProvider: authProvider,
},
TargetMode: targetMode,
CloneTarget: req.CloneTaget,
Chown: forceGitpodUser,
}, nil
}
func newSnapshotInitializer(loc string, rs storage.DirectDownloader, req *csapi.SnapshotInitializer) (*SnapshotInitializer, error) {
return &SnapshotInitializer{
Location: loc,
Snapshot: req.Snapshot,
Storage: rs,
}, nil
}
func downloadOTS(ctx context.Context, url string) (user, pwd string, err error) {
//nolint:ineffassign
span, ctx := opentracing.StartSpanFromContext(ctx, "downloadOTS")
defer tracing.FinishSpan(span, &err)
span.LogKV("url", url)
dl := func() (user, pwd string, err error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return "", "", err
}
_ = opentracing.GlobalTracer().Inject(span.Context(), opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(req.Header))
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", "", xerrors.Errorf("non-OK OTS response: %s", resp.Status)
}
secret, err := io.ReadAll(resp.Body)
if err != nil {
return "", "", err
}
pwd = string(secret)
if segs := strings.Split(pwd, ":"); len(segs) >= 2 {
user = segs[0]
pwd = strings.Join(segs[1:], ":")
}
return
}
for i := 0; i < otsDownloadAttempts; i++ {
span.LogKV("attempt", i)
if i > 0 {
time.Sleep(time.Second)
}
user, pwd, err = dl()
if err == context.Canceled || err == context.DeadlineExceeded {
return
}
if err == nil {
break
}
log.WithError(err).WithField("attempt", i).Warn("cannot download OTS")
}
if err != nil {
log.WithError(err).Warn("failed to download OTS")
return "", "", err
}
return user, pwd, nil
}
// InitializeOpt configures the initialisation procedure
type InitializeOpt func(*initializeOpts)
type initializeOpts struct {
Initializer Initializer
CleanSlate bool
UID int
GID int
mappings []archive.IDMapping
}
// WithMappings configures the UID mappings that're used during content initialization
func WithMappings(mappings []archive.IDMapping) InitializeOpt {
return func(o *initializeOpts) {
o.mappings = mappings
}
}
// WithInitializer configures the initializer that's used during content initialization
func WithInitializer(initializer Initializer) InitializeOpt {
return func(o *initializeOpts) {
o.Initializer = initializer
}
}
// WithCleanSlate ensures there's no prior content in the workspace location
func WithCleanSlate(o *initializeOpts) {
o.CleanSlate = true
}
// WithChown sets a custom UID/GID the content will have after initialisation
func WithChown(uid, gid int) InitializeOpt {
return func(o *initializeOpts) {
o.UID = uid
o.GID = gid
}
}
// InitializeWorkspace initializes a workspace from backup or an initializer
func InitializeWorkspace(ctx context.Context, location string, remoteStorage storage.DirectDownloader, opts ...InitializeOpt) (src csapi.WorkspaceInitSource, err error) {
//nolint:ineffassign
span, ctx := opentracing.StartSpanFromContext(ctx, "InitializeWorkspace")
span.SetTag("location", location)
defer tracing.FinishSpan(span, &err)
cfg := initializeOpts{
Initializer: &EmptyInitializer{},
CleanSlate: false,
GID: GitpodGID,
UID: GitpodUID,
}
for _, o := range opts {
o(&cfg)
}
src = csapi.WorkspaceInitFromOther
// Note: it's important that CleanSlate does not remove the location itself, but merely its content.
// If the location were removed that might break the filesystem quota we have put in place prior.
if cfg.CleanSlate {
// 1. Clean out the workspace directory
if _, err := os.Stat(location); os.IsNotExist(err) {
// in the very unlikely event that the workspace Pod did not mount (and thus create) the workspace directory, create it
err = os.Mkdir(location, 0755)
if os.IsExist(err) {
log.WithError(err).WithField("location", location).Debug("ran into non-atomic workspace location existence check")
span.SetTag("exists", true)
} else if err != nil {
return src, xerrors.Errorf("cannot create workspace: %w", err)
}
}
fs, err := os.ReadDir(location)
if err != nil {
return src, xerrors.Errorf("cannot clean workspace folder: %w", err)
}
for _, f := range fs {
path := filepath.Join(location, f.Name())
err := os.RemoveAll(path)
if err != nil {
return src, xerrors.Errorf("cannot clean workspace folder: %w", err)
}
}
// Chown the workspace directory
err = os.Chown(location, cfg.UID, cfg.GID)
if err != nil {
return src, xerrors.Errorf("cannot create workspace: %w", err)
}
}
// Run the initializer
hasBackup, err := remoteStorage.Download(ctx, location, storage.DefaultBackup, cfg.mappings)
if err != nil {
return src, xerrors.Errorf("cannot restore backup: %w", err)
}
span.SetTag("hasBackup", hasBackup)
if hasBackup {
src = csapi.WorkspaceInitFromBackup
} else {
src, err = cfg.Initializer.Run(ctx, cfg.mappings)
if err != nil {
return src, xerrors.Errorf("cannot initialize workspace: %w", err)
}
}
return
}
// Some workspace content may have a `/dst/.gitpod` file or directory. That would break
// the workspace ready file placement (see https://github.com/gitpod-io/gitpod/issues/7694).
// This function ensures that workspaces do not have a `.gitpod` file or directory present.
func EnsureCleanDotGitpodDirectory(ctx context.Context, wspath string) error {
var mv func(src, dst string) error
if git.IsWorkingCopy(wspath) {
c := &git.Client{
Location: wspath,
}
mv = func(src, dst string) error {
return c.Git(ctx, "mv", src, dst)
}
} else {
mv = os.Rename
}
dotGitpod := filepath.Join(wspath, ".gitpod")
stat, err := os.Stat(dotGitpod)
if os.IsNotExist(err) {
return nil
}
if stat.IsDir() {
// we need this to be a directory, we're probably ok
return nil
}
candidateFN := filepath.Join(wspath, ".gitpod.yaml")
if _, err := os.Stat(candidateFN); err == nil {
// Our candidate file already exists, hence we cannot just move things.
// As fallback we'll delete the .gitpod entry.
return os.RemoveAll(dotGitpod)
}
err = mv(dotGitpod, candidateFN)
if err != nil {
return err
}
return nil
}
// PlaceWorkspaceReadyFile writes a file in the workspace which indicates that the workspace has been initialized
func PlaceWorkspaceReadyFile(ctx context.Context, wspath string, initsrc csapi.WorkspaceInitSource, uid, gid int) (err error) {
//nolint:ineffassign,staticcheck
span, ctx := opentracing.StartSpanFromContext(ctx, "placeWorkspaceReadyFile")
span.SetTag("source", initsrc)
defer tracing.FinishSpan(span, &err)
content := csapi.WorkspaceReadyMessage{
Source: initsrc,
}
fc, err := json.Marshal(content)
if err != nil {
return xerrors.Errorf("cannot marshal workspace ready message: %w", err)
}
gitpodDir := filepath.Join(wspath, filepath.Dir(WorkspaceReadyFile))
err = os.MkdirAll(gitpodDir, 0777)
if err != nil {
return xerrors.Errorf("cannot create directory for workspace ready file: %w", err)
}
err = os.Chown(gitpodDir, uid, gid)
if err != nil {
return xerrors.Errorf("cannot chown directory for workspace ready file: %w", err)
}
tempWorkspaceReadyFile := WorkspaceReadyFile + ".tmp"
fn := filepath.Join(wspath, tempWorkspaceReadyFile)
err = os.WriteFile(fn, []byte(fc), 0644)
if err != nil {
return xerrors.Errorf("cannot write workspace ready file content: %w", err)
}
err = os.Chown(fn, uid, gid)
if err != nil {
return xerrors.Errorf("cannot chown workspace ready file: %w", err)
}
// Theia will listen for a rename event as trigger to start the tasks. This is a rename event
// because we're writing to the file and this is the most convenient way we can tell Theia that we're done writing.
err = os.Rename(fn, filepath.Join(wspath, WorkspaceReadyFile))
if err != nil {
return xerrors.Errorf("cannot rename workspace ready file: %w", err)
}
return nil
}