// Copyright (c) 2021 Gitpod GmbH. All rights reserved. // Licensed under the GNU Affero General Public License (AGPL). // See License-AGPL.txt in the project root for license information. package orchestrator import ( "context" "encoding/json" "errors" "fmt" "io" "io/ioutil" "net/http" "strings" "sync" "time" wsmanapi "github.com/gitpod-io/gitpod/ws-manager/api" "github.com/hashicorp/go-retryablehttp" "golang.org/x/xerrors" "github.com/gitpod-io/gitpod/common-go/log" "github.com/gitpod-io/gitpod/common-go/tracing" "github.com/gitpod-io/gitpod/image-builder/api" ) const ( annotationRef = "ref" annotationBaseRef = "baseref" annotationManagedBy = "managed-by" ) type orchestrator interface { PublishStatus(buildID string, resp *api.BuildResponse) PublishLog(buildID string, message string) } func newBuildMonitor(o orchestrator, wsman wsmanapi.WorkspaceManagerClient) *buildMonitor { return &buildMonitor{ O: o, wsman: wsman, runningBuilds: make(map[string]*runningBuild), logs: map[string]context.CancelFunc{}, } } type buildMonitor struct { O orchestrator wsman wsmanapi.WorkspaceManagerClient runningBuilds map[string]*runningBuild runningBuildsMu sync.RWMutex logs map[string]context.CancelFunc } type runningBuild struct { Info api.BuildInfo Logs buildLogs } type buildLogs struct { IdeURL string OwnerToken string } // Run subscribes to the ws-manager, listens for build updates and distributes them internally func (m *buildMonitor) Run() { ctx := context.Background() for { wss, err := m.wsman.GetWorkspaces(ctx, &wsmanapi.GetWorkspacesRequest{ MustMatch: &wsmanapi.MetadataFilter{ Annotations: map[string]string{ annotationManagedBy: buildWorkspaceManagerID, }, }, }) if err != nil { log.WithError(err).Info("cannot get running builds from ws-manager - retrying") time.Sleep(1 * time.Second) continue } m.runningBuildsMu.Lock() m.runningBuilds = make(map[string]*runningBuild, len(wss.Status)) m.runningBuildsMu.Unlock() for _, ws := range wss.Status { m.handleStatusUpdate(ws) } sub, err := m.wsman.Subscribe(ctx, &wsmanapi.SubscribeRequest{ MustMatch: &wsmanapi.MetadataFilter{ Annotations: map[string]string{ annotationManagedBy: buildWorkspaceManagerID, }, }, }) if err != nil { log.WithError(err).Info("connection to ws-manager lost - retrying") time.Sleep(5 * time.Second) continue } for { msg, err := sub.Recv() if err != nil { log.WithError(err).Info("connection to ws-manager lost - retrying") time.Sleep(1 * time.Second) break } status := msg.GetStatus() if status == nil { continue } m.handleStatusUpdate(status) } } } func (m *buildMonitor) handleStatusUpdate(status *wsmanapi.WorkspaceStatus) { var ( bld = extractRunningBuild(status) resp = extractBuildResponse(status) ) m.runningBuildsMu.Lock() if resp.Status != api.BuildStatus_running { delete(m.runningBuilds, status.Id) } else { m.runningBuilds[status.Id] = bld } m.runningBuildsMu.Unlock() m.O.PublishStatus(status.Id, resp) // handleStatusUpdate is called from a single go-routine, hence there's no need to synchronize // access to m.logs if bld.Info.Status == api.BuildStatus_running { if _, ok := m.logs[status.Id]; !ok { // we don't have a headless log listener yet, but need one ctx, cancel := context.WithCancel(context.Background()) go listenToHeadlessLogs(ctx, bld.Logs.IdeURL, bld.Logs.OwnerToken, m.handleHeadlessLogs(status.Id)) m.logs[status.Id] = cancel } } else { if cancel, ok := m.logs[status.Id]; ok { // we have a headless log listener, and need to stop it cancel() delete(m.logs, status.Id) } } } func (m *buildMonitor) handleHeadlessLogs(buildID string) listenToHeadlessLogsCallback { return func(content []byte, err error) { if err != nil && !errors.Is(err, context.Canceled) { log.WithError(err).WithField("buildID", buildID).Warn("headless log listener failed") m.O.PublishLog(buildID, "Build log listener failed. The image build is still running, but you won't see any log output.") return } if len(content) > 0 { m.O.PublishLog(buildID, string(content)) } } } var errOutOfRetries = xerrors.Errorf("out of retries") // retry makes multiple attempts to execute op if op returns an UNAVAILABLE gRPC status code func retry(ctx context.Context, op func(ctx context.Context) error, retry func(err error) bool, initialBackoff time.Duration, retries int) (err error) { span, ctx := tracing.FromContext(ctx, "retryIfUnavailable") defer tracing.FinishSpan(span, &err) for i := 0; i < retries; i++ { err := op(ctx) span.LogKV("attempt", i) if retry(err) { time.Sleep(initialBackoff * time.Duration(1+i)) continue } if err != nil { return err } return nil } // we've maxed out our retry attempts return errOutOfRetries } func extractBuildStatus(status *wsmanapi.WorkspaceStatus) *api.BuildInfo { s := api.BuildStatus_running if status.Phase == wsmanapi.WorkspacePhase_STOPPING || status.Phase == wsmanapi.WorkspacePhase_STOPPED { if status.Conditions.Failed == "" && status.Conditions.HeadlessTaskFailed == "" { s = api.BuildStatus_done_success } else { s = api.BuildStatus_done_failure } } return &api.BuildInfo{ BuildId: status.Metadata.MetaId, Ref: status.Metadata.Annotations[annotationRef], BaseRef: status.Metadata.Annotations[annotationBaseRef], Status: s, StartedAt: status.Metadata.StartedAt.Seconds, LogInfo: &api.LogInfo{ Url: status.Spec.Url, Headers: map[string]string{ "x-gitpod-owner-token": status.Auth.OwnerToken, }, }, } } func extractRunningBuild(status *wsmanapi.WorkspaceStatus) *runningBuild { return &runningBuild{ Info: *extractBuildStatus(status), Logs: buildLogs{ IdeURL: status.Spec.Url, OwnerToken: status.Auth.OwnerToken, }, } } func extractBuildResponse(status *wsmanapi.WorkspaceStatus) *api.BuildResponse { var ( info = extractBuildStatus(status) msg = status.Message ) if status.Phase == wsmanapi.WorkspacePhase_STOPPING { if status.Conditions.Failed != "" { msg = status.Conditions.Failed } else if status.Conditions.HeadlessTaskFailed != "" { msg = status.Conditions.HeadlessTaskFailed } } return &api.BuildResponse{ Ref: info.Ref, // set for backwards compatibilty - new clients should consume Info BaseRef: info.BaseRef, // set for backwards compatibilty - new clients should consume Info Status: info.Status, Message: msg, Info: info, } } func (m *buildMonitor) GetAllRunningBuilds(ctx context.Context) (res []*runningBuild, err error) { m.runningBuildsMu.RLock() defer m.runningBuildsMu.RUnlock() res = make([]*runningBuild, 0, len(m.runningBuilds)) for _, ws := range m.runningBuilds { res = append(res, ws) } return } func (m *buildMonitor) RegisterNewBuild(buildID string, ref, baseRef, url, ownerToken string) { m.runningBuildsMu.Lock() defer m.runningBuildsMu.Unlock() bld := &runningBuild{ Info: api.BuildInfo{ BuildId: buildID, Ref: ref, BaseRef: baseRef, Status: api.BuildStatus_running, StartedAt: time.Now().Unix(), }, Logs: buildLogs{ IdeURL: url, OwnerToken: ownerToken, }, } m.runningBuilds[buildID] = bld log.WithField("build", bld).WithField("buildID", buildID).Debug("new build registered") } type listenToHeadlessLogsCallback func(content []byte, err error) func listenToHeadlessLogs(ctx context.Context, url, authToken string, callback listenToHeadlessLogsCallback) { var err error defer func() { if err != nil { callback(nil, err) } }() var ( logURL string noTerminalErr = fmt.Errorf("no terminal") ) err = retry(ctx, func(ctx context.Context) error { logURL, err = findTaskLogURL(ctx, url, authToken) if err != nil { return err } if strings.HasSuffix(logURL, "/listen/") { return noTerminalErr } return nil }, func(err error) bool { return err == noTerminalErr }, 1*time.Second, 10) if err != nil { return } log.WithField("logURL", logURL).Debug("found log URL") callback([]byte("Connecting to log output ...\n"), nil) req, err := http.NewRequestWithContext(ctx, "GET", logURL, nil) if err != nil { return } req.Header.Set("x-gitpod-owner-token", authToken) req.Header.Set("Cache", "no-cache") client := retryablehttp.NewClient() client.HTTPClient = &http.Client{ Timeout: 2 * time.Second, } resp, err := client.StandardClient().Do(req) if err != nil { return } log.WithField("logURL", logURL).Debug("terminal log response received") callback([]byte("Connected to log output ...\n"), nil) _ = resp.Body.Close() resp, err = http.DefaultClient.Do(req) if err != nil { return } defer resp.Body.Close() var line struct { Result struct { Data []byte `json:"data"` } `json:"result"` } dec := json.NewDecoder(resp.Body) for err == nil { err = dec.Decode(&line) if errors.Is(err, io.EOF) { // EOF is not an error in this case err = nil break } if err != nil { break } callback(line.Result.Data, nil) } } func findTaskLogURL(ctx context.Context, ideURL, authToken string) (taskLogURL string, err error) { ideURL = strings.TrimSuffix(ideURL, "/") tasksURL := ideURL + "/_supervisor/v1/status/tasks" req, err := http.NewRequestWithContext(ctx, "GET", tasksURL, nil) if err != nil { return "", err } req.Header.Set("x-gitpod-owner-token", authToken) req.Header.Set("Cache", "no-cache") client := retryablehttp.NewClient() client.RetryMax = 10 client.Logger = nil client.CheckRetry = func(ctx context.Context, resp *http.Response, err error) (bool, error) { if errors.Is(err, io.EOF) { // the network is not reliable return true, nil } if err != nil && strings.Contains(err.Error(), "received non-200 status") { // gRPC-web race in supervisor? return true, nil } if resp != nil && resp.StatusCode == http.StatusNotFound { // We're too quick to connect - ws-proxy doesn't keep up return true, nil } return retryablehttp.DefaultRetryPolicy(ctx, resp, err) } resp, err := client.StandardClient().Do(req) if err != nil { return "", xerrors.Errorf("cannot connect to supervisor: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", xerrors.Errorf("received non-200 status from %s: %v", tasksURL, resp.StatusCode) } msg, err := ioutil.ReadAll(resp.Body) if err != nil { return "", err } var respb struct { Result struct { Tasks []struct { Terminal string `json:"terminal"` } `json:"tasks"` } `json:"result"` } err = json.Unmarshal(msg, &respb) if err != nil { return "", xerrors.Errorf("cannot decode supervisor status response: %w", err) } if len(respb.Result.Tasks) == 0 { return "", xerrors.Errorf("build workspace has no tasks") } return fmt.Sprintf("%s/_supervisor/v1/terminal/listen/%s", ideURL, respb.Result.Tasks[0].Terminal), nil }