Manuel Alejandro de Brito Fontes 0412c5f461 [test] Reduce loop wait time
2022-10-05 05:15:20 +02:00

426 lines
11 KiB
Go

// Copyright (c) 2021 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.
package orchestrator
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"strings"
"sync"
"time"
wsmanapi "github.com/gitpod-io/gitpod/ws-manager/api"
"github.com/hashicorp/go-retryablehttp"
"golang.org/x/xerrors"
"github.com/gitpod-io/gitpod/common-go/log"
"github.com/gitpod-io/gitpod/common-go/tracing"
"github.com/gitpod-io/gitpod/image-builder/api"
)
const (
annotationRef = "ref"
annotationBaseRef = "baseref"
annotationManagedBy = "managed-by"
)
type orchestrator interface {
PublishStatus(buildID string, resp *api.BuildResponse)
PublishLog(buildID string, message string)
}
func newBuildMonitor(o orchestrator, wsman wsmanapi.WorkspaceManagerClient) *buildMonitor {
return &buildMonitor{
O: o,
wsman: wsman,
runningBuilds: make(map[string]*runningBuild),
logs: map[string]context.CancelFunc{},
}
}
type buildMonitor struct {
O orchestrator
wsman wsmanapi.WorkspaceManagerClient
runningBuilds map[string]*runningBuild
runningBuildsMu sync.RWMutex
logs map[string]context.CancelFunc
}
type runningBuild struct {
Info api.BuildInfo
Logs buildLogs
}
type buildLogs struct {
IdeURL string
OwnerToken string
}
// Run subscribes to the ws-manager, listens for build updates and distributes them internally
func (m *buildMonitor) Run() {
ctx := context.Background()
for {
wss, err := m.wsman.GetWorkspaces(ctx, &wsmanapi.GetWorkspacesRequest{
MustMatch: &wsmanapi.MetadataFilter{
Annotations: map[string]string{
annotationManagedBy: buildWorkspaceManagerID,
},
},
})
if err != nil {
log.WithError(err).Info("cannot get running builds from ws-manager - retrying")
time.Sleep(1 * time.Second)
continue
}
m.runningBuildsMu.Lock()
m.runningBuilds = make(map[string]*runningBuild, len(wss.Status))
m.runningBuildsMu.Unlock()
for _, ws := range wss.Status {
m.handleStatusUpdate(ws)
}
sub, err := m.wsman.Subscribe(ctx, &wsmanapi.SubscribeRequest{
MustMatch: &wsmanapi.MetadataFilter{
Annotations: map[string]string{
annotationManagedBy: buildWorkspaceManagerID,
},
},
})
if err != nil {
log.WithError(err).Info("connection to ws-manager lost - retrying")
time.Sleep(5 * time.Second)
continue
}
for {
msg, err := sub.Recv()
if err != nil {
log.WithError(err).Info("connection to ws-manager lost - retrying")
time.Sleep(1 * time.Second)
break
}
status := msg.GetStatus()
if status == nil {
continue
}
m.handleStatusUpdate(status)
}
}
}
func (m *buildMonitor) handleStatusUpdate(status *wsmanapi.WorkspaceStatus) {
var (
bld = extractRunningBuild(status)
resp = extractBuildResponse(status)
)
m.runningBuildsMu.Lock()
if resp.Status != api.BuildStatus_running {
delete(m.runningBuilds, status.Id)
} else {
m.runningBuilds[status.Id] = bld
}
m.runningBuildsMu.Unlock()
m.O.PublishStatus(status.Id, resp)
// handleStatusUpdate is called from a single go-routine, hence there's no need to synchronize
// access to m.logs
if bld.Info.Status == api.BuildStatus_running {
if _, ok := m.logs[status.Id]; !ok {
// we don't have a headless log listener yet, but need one
ctx, cancel := context.WithCancel(context.Background())
go listenToHeadlessLogs(ctx, bld.Logs.IdeURL, bld.Logs.OwnerToken, m.handleHeadlessLogs(status.Id))
m.logs[status.Id] = cancel
}
} else {
if cancel, ok := m.logs[status.Id]; ok {
// we have a headless log listener, and need to stop it
cancel()
delete(m.logs, status.Id)
}
}
}
func (m *buildMonitor) handleHeadlessLogs(buildID string) listenToHeadlessLogsCallback {
return func(content []byte, err error) {
if err != nil && !errors.Is(err, context.Canceled) {
log.WithError(err).WithField("buildID", buildID).Warn("headless log listener failed")
m.O.PublishLog(buildID, "Build log listener failed. The image build is still running, but you won't see any log output.")
return
}
if len(content) > 0 {
m.O.PublishLog(buildID, string(content))
}
}
}
var errOutOfRetries = xerrors.Errorf("out of retries")
// retry makes multiple attempts to execute op if op returns an UNAVAILABLE gRPC status code
func retry(ctx context.Context, op func(ctx context.Context) error, retry func(err error) bool, initialBackoff time.Duration, retries int) (err error) {
span, ctx := tracing.FromContext(ctx, "retryIfUnavailable")
defer tracing.FinishSpan(span, &err)
for i := 0; i < retries; i++ {
err := op(ctx)
span.LogKV("attempt", i)
if retry(err) {
time.Sleep(initialBackoff * time.Duration(1+i))
continue
}
if err != nil {
return err
}
return nil
}
// we've maxed out our retry attempts
return errOutOfRetries
}
func extractBuildStatus(status *wsmanapi.WorkspaceStatus) *api.BuildInfo {
s := api.BuildStatus_running
if status.Phase == wsmanapi.WorkspacePhase_STOPPING || status.Phase == wsmanapi.WorkspacePhase_STOPPED {
if status.Conditions.Failed == "" && status.Conditions.HeadlessTaskFailed == "" {
s = api.BuildStatus_done_success
} else {
s = api.BuildStatus_done_failure
}
}
return &api.BuildInfo{
BuildId: status.Metadata.MetaId,
Ref: status.Metadata.Annotations[annotationRef],
BaseRef: status.Metadata.Annotations[annotationBaseRef],
Status: s,
StartedAt: status.Metadata.StartedAt.Seconds,
LogInfo: &api.LogInfo{
Url: status.Spec.Url,
Headers: map[string]string{
"x-gitpod-owner-token": status.Auth.OwnerToken,
},
},
}
}
func extractRunningBuild(status *wsmanapi.WorkspaceStatus) *runningBuild {
return &runningBuild{
Info: *extractBuildStatus(status),
Logs: buildLogs{
IdeURL: status.Spec.Url,
OwnerToken: status.Auth.OwnerToken,
},
}
}
func extractBuildResponse(status *wsmanapi.WorkspaceStatus) *api.BuildResponse {
var (
info = extractBuildStatus(status)
msg = status.Message
)
if status.Phase == wsmanapi.WorkspacePhase_STOPPING {
if status.Conditions.Failed != "" {
msg = status.Conditions.Failed
} else if status.Conditions.HeadlessTaskFailed != "" {
msg = status.Conditions.HeadlessTaskFailed
}
}
return &api.BuildResponse{
Ref: info.Ref, // set for backwards compatibilty - new clients should consume Info
BaseRef: info.BaseRef, // set for backwards compatibilty - new clients should consume Info
Status: info.Status,
Message: msg,
Info: info,
}
}
func (m *buildMonitor) GetAllRunningBuilds(ctx context.Context) (res []*runningBuild, err error) {
m.runningBuildsMu.RLock()
defer m.runningBuildsMu.RUnlock()
res = make([]*runningBuild, 0, len(m.runningBuilds))
for _, ws := range m.runningBuilds {
res = append(res, ws)
}
return
}
func (m *buildMonitor) RegisterNewBuild(buildID string, ref, baseRef, url, ownerToken string) {
m.runningBuildsMu.Lock()
defer m.runningBuildsMu.Unlock()
bld := &runningBuild{
Info: api.BuildInfo{
BuildId: buildID,
Ref: ref,
BaseRef: baseRef,
Status: api.BuildStatus_running,
StartedAt: time.Now().Unix(),
},
Logs: buildLogs{
IdeURL: url,
OwnerToken: ownerToken,
},
}
m.runningBuilds[buildID] = bld
log.WithField("build", bld).WithField("buildID", buildID).Debug("new build registered")
}
type listenToHeadlessLogsCallback func(content []byte, err error)
func listenToHeadlessLogs(ctx context.Context, url, authToken string, callback listenToHeadlessLogsCallback) {
var err error
defer func() {
if err != nil {
callback(nil, err)
}
}()
var (
logURL string
noTerminalErr = fmt.Errorf("no terminal")
)
err = retry(ctx, func(ctx context.Context) error {
logURL, err = findTaskLogURL(ctx, url, authToken)
if err != nil {
return err
}
if strings.HasSuffix(logURL, "/listen/") {
return noTerminalErr
}
return nil
}, func(err error) bool {
return err == noTerminalErr
}, 1*time.Second, 10)
if err != nil {
return
}
log.WithField("logURL", logURL).Debug("found log URL")
callback([]byte("Connecting to log output ...\n"), nil)
req, err := http.NewRequestWithContext(ctx, "GET", logURL, nil)
if err != nil {
return
}
req.Header.Set("x-gitpod-owner-token", authToken)
req.Header.Set("Cache", "no-cache")
client := retryablehttp.NewClient()
client.HTTPClient = &http.Client{
Timeout: 2 * time.Second,
}
resp, err := client.StandardClient().Do(req)
if err != nil {
return
}
log.WithField("logURL", logURL).Debug("terminal log response received")
callback([]byte("Connected to log output ...\n"), nil)
_ = resp.Body.Close()
resp, err = http.DefaultClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
var line struct {
Result struct {
Data []byte `json:"data"`
} `json:"result"`
}
dec := json.NewDecoder(resp.Body)
for err == nil {
err = dec.Decode(&line)
if errors.Is(err, io.EOF) {
// EOF is not an error in this case
err = nil
break
}
if err != nil {
break
}
callback(line.Result.Data, nil)
}
}
func findTaskLogURL(ctx context.Context, ideURL, authToken string) (taskLogURL string, err error) {
ideURL = strings.TrimSuffix(ideURL, "/")
tasksURL := ideURL + "/_supervisor/v1/status/tasks"
req, err := http.NewRequestWithContext(ctx, "GET", tasksURL, nil)
if err != nil {
return "", err
}
req.Header.Set("x-gitpod-owner-token", authToken)
req.Header.Set("Cache", "no-cache")
client := retryablehttp.NewClient()
client.RetryMax = 10
client.Logger = nil
client.CheckRetry = func(ctx context.Context, resp *http.Response, err error) (bool, error) {
if errors.Is(err, io.EOF) {
// the network is not reliable
return true, nil
}
if err != nil && strings.Contains(err.Error(), "received non-200 status") {
// gRPC-web race in supervisor?
return true, nil
}
if resp != nil && resp.StatusCode == http.StatusNotFound {
// We're too quick to connect - ws-proxy doesn't keep up
return true, nil
}
return retryablehttp.DefaultRetryPolicy(ctx, resp, err)
}
resp, err := client.StandardClient().Do(req)
if err != nil {
return "", xerrors.Errorf("cannot connect to supervisor: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", xerrors.Errorf("received non-200 status from %s: %v", tasksURL, resp.StatusCode)
}
msg, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
var respb struct {
Result struct {
Tasks []struct {
Terminal string `json:"terminal"`
} `json:"tasks"`
} `json:"result"`
}
err = json.Unmarshal(msg, &respb)
if err != nil {
return "", xerrors.Errorf("cannot decode supervisor status response: %w", err)
}
if len(respb.Result.Tasks) == 0 {
return "", xerrors.Errorf("build workspace has no tasks")
}
return fmt.Sprintf("%s/_supervisor/v1/terminal/listen/%s", ideURL, respb.Result.Tasks[0].Terminal), nil
}