mirror of
https://github.com/gitpod-io/gitpod.git
synced 2025-12-08 17:36:30 +00:00
408 lines
11 KiB
Go
408 lines
11 KiB
Go
// Copyright (c) 2020 TypeFox GmbH. All rights reserved.
|
|
// Licensed under the Gitpod Enterprise Source Code License,
|
|
// See License.enterprise.txt in the project root folder.
|
|
|
|
package scaler
|
|
|
|
import (
|
|
"context"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gitpod-io/gitpod/common-go/log"
|
|
"github.com/gitpod-io/gitpod/common-go/util"
|
|
csapi "github.com/gitpod-io/gitpod/content-service/api"
|
|
"github.com/gitpod-io/gitpod/ws-manager/api"
|
|
"github.com/golang/protobuf/ptypes"
|
|
"github.com/google/uuid"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"golang.org/x/xerrors"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
const (
|
|
// DefaultGhostOwner is the UID of the default owner for ghost workspaces
|
|
DefaultGhostOwner = "00000000-0000-0000-0000-000000000000"
|
|
maxGhostStartTime = 15 * time.Minute
|
|
)
|
|
|
|
// WorkspaceManagerPrescaleDriverConfig configures a ws-manager based prescale driver
|
|
type WorkspaceManagerPrescaleDriverConfig struct {
|
|
WorkspaceManagerAddr string `json:"wsmanAddr"`
|
|
GhostOwner string `json:"ghostOwner"`
|
|
WorkspaceImage string `json:"workspaceImage"`
|
|
IDEImage string `json:"ideImage"`
|
|
FeatureFlags []api.WorkspaceFeatureFlag `json:"featureFlags"`
|
|
|
|
MaxGhostWorkspaces int `json:"maxGhostWorkspaces"`
|
|
SchedulerInterval util.Duration `json:"schedulerInterval"`
|
|
|
|
Renewal struct {
|
|
Interval util.Duration `json:"interval"`
|
|
Percentage int `json:"percentage"`
|
|
} `json:"renewal"`
|
|
}
|
|
|
|
// NewWorkspaceManagerPrescaleDriver creates a new WorkspaceManagerPrescale
|
|
func NewWorkspaceManagerPrescaleDriver(config WorkspaceManagerPrescaleDriverConfig, controller Controller) (*WorkspaceManagerPrescaleDriver, error) {
|
|
if config.GhostOwner == "" {
|
|
config.GhostOwner = DefaultGhostOwner
|
|
}
|
|
if config.Renewal.Percentage < 0 && 100 < config.Renewal.Percentage {
|
|
return nil, xerrors.Errorf("renewal.percentage must be between 0 and 100 (inclusive)")
|
|
}
|
|
if config.SchedulerInterval <= 0 {
|
|
return nil, xerrors.Errorf("schedulerInterval must be greater than zero")
|
|
}
|
|
|
|
conn, err := grpc.Dial(config.WorkspaceManagerAddr, grpc.WithInsecure())
|
|
if err != nil {
|
|
return nil, xerrors.Errorf("cannot connect to ws-manager: %w", err)
|
|
}
|
|
return &WorkspaceManagerPrescaleDriver{
|
|
Config: config,
|
|
Client: api.NewWorkspaceManagerClient(conn),
|
|
conn: conn,
|
|
Controller: controller,
|
|
time: realtime,
|
|
stop: make(chan struct{}),
|
|
}, nil
|
|
}
|
|
|
|
// WorkspaceManagerPrescaleDriver implements a prescale driver using ws-manager's ghost pods
|
|
type WorkspaceManagerPrescaleDriver struct {
|
|
Config WorkspaceManagerPrescaleDriverConfig
|
|
|
|
Client api.WorkspaceManagerClient
|
|
conn *grpc.ClientConn
|
|
|
|
Controller Controller
|
|
|
|
time timer
|
|
metrics *metrics
|
|
|
|
stop chan struct{}
|
|
once sync.Once
|
|
}
|
|
|
|
type workspaceStatus struct {
|
|
Count WorkspaceCount
|
|
DeletionCandidates []string
|
|
}
|
|
|
|
// RegisterMetrics registers prometheus metrics for this driver
|
|
func (wspd *WorkspaceManagerPrescaleDriver) RegisterMetrics(reg prometheus.Registerer) error {
|
|
wspd.metrics = newMetrics()
|
|
return wspd.metrics.Register(reg)
|
|
}
|
|
|
|
// Run runs the prescale driver until Stop() is called
|
|
func (wspd *WorkspaceManagerPrescaleDriver) Run() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
statusChan := make(chan workspaceStatus)
|
|
go func() {
|
|
for {
|
|
err := wspd.maintainWorkspaceStatus(ctx, statusChan)
|
|
if err == context.Canceled {
|
|
return
|
|
}
|
|
if err != nil {
|
|
log.WithError(err).Error("cannot maintain workspace count")
|
|
time.Sleep(1 * time.Second)
|
|
}
|
|
}
|
|
}()
|
|
|
|
var renewal <-chan time.Time
|
|
if wspd.Config.Renewal.Percentage > 0 && wspd.Config.Renewal.Interval > 0 {
|
|
tc, stop := wspd.time.NewTicker(time.Duration(wspd.Config.Renewal.Interval))
|
|
defer stop()
|
|
renewal = tc
|
|
|
|
log.WithField("percentage", wspd.Config.Renewal.Percentage).WithField("interval", wspd.Config.Renewal.Interval.String()).Info("enabled ghost workspace renewal")
|
|
}
|
|
houseKeeping, stopHouseKeeping := wspd.time.NewTicker(1 * time.Minute)
|
|
defer stopHouseKeeping()
|
|
scheduleGhosts, stopSchedulingGhosts := wspd.time.NewTicker(time.Duration(wspd.Config.SchedulerInterval))
|
|
defer stopSchedulingGhosts()
|
|
|
|
var (
|
|
counts = make(chan WorkspaceCount)
|
|
status workspaceStatus
|
|
startingGhosts = make(map[string]time.Time)
|
|
setpoint int
|
|
)
|
|
|
|
cchan := wspd.Controller.Control(ctx, counts)
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case status = <-statusChan:
|
|
for _, id := range status.DeletionCandidates {
|
|
delete(startingGhosts, id)
|
|
}
|
|
status.Count.Ghost += len(startingGhosts)
|
|
counts <- status.Count
|
|
wspd.metrics.OnGhostCountChange(status.Count.Ghost)
|
|
log.WithField("counts", status.Count).Debug("status update")
|
|
case <-houseKeeping:
|
|
for id, t1 := range startingGhosts {
|
|
if time.Since(t1) < maxGhostStartTime {
|
|
continue
|
|
}
|
|
delete(startingGhosts, id)
|
|
status.Count.Ghost--
|
|
|
|
log.WithFields(log.OWI("", "", id)).Warn("ghost took too long to start - or we missed an update")
|
|
}
|
|
case <-renewal:
|
|
d := int(float64(len(status.DeletionCandidates)) * float64(wspd.Config.Renewal.Percentage) * 0.01)
|
|
if d == 0 {
|
|
log.WithField("len(status.DeletionCandidates)", len(status.DeletionCandidates)).Warn("should have renewed ghost workspaces, but found no suitable candidates")
|
|
continue
|
|
}
|
|
if d > len(status.DeletionCandidates) {
|
|
d = len(status.DeletionCandidates)
|
|
}
|
|
log.WithField("count", d).Info("attempting to renew ghost workspaces")
|
|
err := wspd.stopGhostWorkspaces(ctx, status.DeletionCandidates[:d])
|
|
if err != nil {
|
|
log.WithError(err).Error("cannot stop ghost workspaces during renewal")
|
|
continue
|
|
}
|
|
ids, err := wspd.startGhostWorkspaces(ctx, d, status)
|
|
if err != nil {
|
|
log.WithError(err).Error("cannot start ghost workspaces during renewal")
|
|
continue
|
|
}
|
|
for _, id := range ids {
|
|
startingGhosts[id] = time.Now()
|
|
status.Count.Ghost++
|
|
}
|
|
log.WithField("delta", d).Info("renewed ghost workspaces")
|
|
case setpoint = <-cchan:
|
|
// we've already set the new setpoint - wait for scheduleGhosts to act on it.
|
|
wspd.metrics.OnSetpointChange(setpoint)
|
|
case <-scheduleGhosts:
|
|
d := setpoint - status.Count.Ghost
|
|
if d == 0 {
|
|
continue
|
|
}
|
|
|
|
var (
|
|
err error
|
|
ids []string
|
|
)
|
|
if d < 0 {
|
|
d *= -1
|
|
if d > len(status.DeletionCandidates) {
|
|
d = len(status.DeletionCandidates)
|
|
}
|
|
err = wspd.stopGhostWorkspaces(ctx, status.DeletionCandidates[:d])
|
|
}
|
|
if d > 0 {
|
|
ids, err = wspd.startGhostWorkspaces(ctx, d, status)
|
|
}
|
|
if err != nil {
|
|
log.WithError(err).Error("failed to realise ghost workspace delta")
|
|
continue
|
|
}
|
|
|
|
for _, id := range ids {
|
|
startingGhosts[id] = wspd.time.Now()
|
|
status.Count.Ghost++
|
|
}
|
|
log.WithField("setpoint", setpoint).WithField("delta", d).Info("(de)scheduled ghost workspaces")
|
|
}
|
|
}
|
|
}
|
|
|
|
func (wspd *WorkspaceManagerPrescaleDriver) startGhostWorkspaces(ctx context.Context, count int, status workspaceStatus) (ids []string, err error) {
|
|
ctx, cancel := context.WithTimeout(ctx, 15*time.Second)
|
|
defer cancel()
|
|
|
|
ids = make([]string, count)
|
|
for i := 0; i < count; i++ {
|
|
if status.Count.Ghost+i >= wspd.Config.MaxGhostWorkspaces {
|
|
log.WithField("limit", wspd.Config.MaxGhostWorkspaces).Warn("max number of ghost workspace reached")
|
|
return ids, nil
|
|
}
|
|
|
|
instanceUUID, err := uuid.NewRandom()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
instanceID := instanceUUID.String()
|
|
ids[i] = instanceID
|
|
metaUUID, err := uuid.NewRandom()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
metaID := metaUUID.String()
|
|
|
|
_, err = wspd.Client.StartWorkspace(ctx, &api.StartWorkspaceRequest{
|
|
Type: api.WorkspaceType_GHOST,
|
|
Id: instanceID,
|
|
Metadata: &api.WorkspaceMetadata{
|
|
MetaId: metaID,
|
|
Owner: wspd.Config.GhostOwner,
|
|
},
|
|
ServicePrefix: instanceID,
|
|
Spec: &api.StartWorkspaceSpec{
|
|
Admission: api.AdmissionLevel_ADMIT_OWNER_ONLY,
|
|
Timeout: "60m",
|
|
CheckoutLocation: "none",
|
|
FeatureFlags: wspd.Config.FeatureFlags,
|
|
Git: &api.GitSpec{
|
|
Email: "none@gitpod.io",
|
|
Username: "gitpod-ghost",
|
|
},
|
|
IdeImage: wspd.Config.IDEImage,
|
|
Initializer: &csapi.WorkspaceInitializer{
|
|
Spec: &csapi.WorkspaceInitializer_Empty{
|
|
Empty: &csapi.EmptyInitializer{},
|
|
},
|
|
},
|
|
WorkspaceLocation: "none",
|
|
WorkspaceImage: wspd.Config.WorkspaceImage,
|
|
},
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return ids, nil
|
|
}
|
|
|
|
func (wspd *WorkspaceManagerPrescaleDriver) stopGhostWorkspaces(ctx context.Context, ids []string) (err error) {
|
|
defer func() {
|
|
if err != nil {
|
|
err = xerrors.Errorf("cannot stop ghosts: %w", err)
|
|
}
|
|
}()
|
|
|
|
for _, id := range ids {
|
|
_, err := wspd.Client.StopWorkspace(ctx, &api.StopWorkspaceRequest{Id: id, Policy: api.StopWorkspacePolicy_NORMALLY})
|
|
if status.Code(err) == codes.NotFound {
|
|
continue
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (wspd *WorkspaceManagerPrescaleDriver) maintainWorkspaceStatus(ctx context.Context, counts chan<- workspaceStatus) error {
|
|
type workspaceState struct {
|
|
Started time.Time
|
|
Type api.WorkspaceType
|
|
}
|
|
|
|
wss, err := wspd.Client.GetWorkspaces(ctx, &api.GetWorkspacesRequest{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
state := make(map[string]workspaceState)
|
|
produceStatus := func() workspaceStatus {
|
|
var res workspaceStatus
|
|
res.DeletionCandidates = make([]string, 0, len(state))
|
|
for id, s := range state {
|
|
switch s.Type {
|
|
case api.WorkspaceType_GHOST:
|
|
res.DeletionCandidates = append(res.DeletionCandidates, id)
|
|
res.Count.Ghost++
|
|
case api.WorkspaceType_REGULAR:
|
|
res.Count.Regular++
|
|
}
|
|
}
|
|
|
|
sort.Slice(res.DeletionCandidates, func(i, j int) bool {
|
|
var (
|
|
ti = state[res.DeletionCandidates[i]].Started
|
|
tj = state[res.DeletionCandidates[j]].Started
|
|
)
|
|
return ti.Before(tj)
|
|
})
|
|
return res
|
|
}
|
|
|
|
for _, s := range wss.Status {
|
|
startedAt, err := ptypes.Timestamp(s.Metadata.StartedAt)
|
|
if err != nil {
|
|
log.WithError(err).WithFields(log.OWI(s.Metadata.Owner, s.Metadata.MetaId, s.Id)).Warn("cannot convert startedAt timestamp")
|
|
startedAt = time.Now()
|
|
}
|
|
state[s.Id] = workspaceState{
|
|
Started: startedAt,
|
|
Type: s.Spec.Type,
|
|
}
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case counts <- produceStatus():
|
|
}
|
|
|
|
sub, err := wspd.Client.Subscribe(ctx, &api.SubscribeRequest{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for {
|
|
resp, err := sub.Recv()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s := resp.GetStatus()
|
|
if s == nil {
|
|
continue
|
|
}
|
|
|
|
_, known := state[s.Id]
|
|
if known && s.Phase == api.WorkspacePhase_STOPPED {
|
|
delete(state, s.Id)
|
|
} else if !known && s.Phase == api.WorkspacePhase_PENDING {
|
|
startedAt, err := ptypes.Timestamp(s.Metadata.StartedAt)
|
|
if err != nil {
|
|
log.WithError(err).WithFields(log.OWI(s.Metadata.Owner, s.Metadata.MetaId, s.Id)).Warn("cannot convert startedAt timestamp")
|
|
startedAt = time.Now()
|
|
}
|
|
state[s.Id] = workspaceState{
|
|
Started: startedAt,
|
|
Type: s.Spec.Type,
|
|
}
|
|
state[s.Id] = workspaceState{
|
|
Started: startedAt,
|
|
Type: s.Spec.Type,
|
|
}
|
|
} else {
|
|
continue
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case counts <- produceStatus():
|
|
}
|
|
}
|
|
}
|
|
|
|
// Stop stops the driver
|
|
func (wspd *WorkspaceManagerPrescaleDriver) Stop() {
|
|
wspd.once.Do(func() {
|
|
close(wspd.stop)
|
|
if wspd.conn != nil {
|
|
wspd.conn.Close()
|
|
}
|
|
})
|
|
}
|