From edbdc570c7492b8be7eda2cb5f5afa0915a68f3b Mon Sep 17 00:00:00 2001 From: Michael Lumish Date: Tue, 2 Aug 2022 11:02:02 -0700 Subject: [PATCH 1/5] grpc-js: Add outlier detection tracing and enable it in interop tests --- packages/grpc-js-xds/interop/Dockerfile | 2 +- .../src/load-balancer-outlier-detection.ts | 31 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/packages/grpc-js-xds/interop/Dockerfile b/packages/grpc-js-xds/interop/Dockerfile index e8a0a98c..b93e309d 100644 --- a/packages/grpc-js-xds/interop/Dockerfile +++ b/packages/grpc-js-xds/interop/Dockerfile @@ -33,6 +33,6 @@ COPY --from=build /node/src/grpc-node/packages/grpc-js ./packages/grpc-js/ COPY --from=build /node/src/grpc-node/packages/grpc-js-xds ./packages/grpc-js-xds/ ENV GRPC_VERBOSITY="DEBUG" -ENV GRPC_TRACE=xds_client,xds_resolver,cds_balancer,eds_balancer,priority,weighted_target,round_robin,resolving_load_balancer,subchannel,keepalive,dns_resolver,fault_injection,http_filter,csds +ENV GRPC_TRACE=xds_client,xds_resolver,cds_balancer,eds_balancer,priority,weighted_target,round_robin,resolving_load_balancer,subchannel,keepalive,dns_resolver,fault_injection,http_filter,csds,outlier_detection ENTRYPOINT [ "node", "/node/src/grpc-node/packages/grpc-js-xds/build/interop/xds-interop-client" ] diff --git a/packages/grpc-js/src/load-balancer-outlier-detection.ts b/packages/grpc-js/src/load-balancer-outlier-detection.ts index dde618b7..e9793bea 100644 --- a/packages/grpc-js/src/load-balancer-outlier-detection.ts +++ b/packages/grpc-js/src/load-balancer-outlier-detection.ts @@ -18,7 +18,7 @@ import { ChannelOptions, connectivityState, StatusObject } from "."; import { Call } from "./call-stream"; import { ConnectivityState } from "./connectivity-state"; -import { Status } from "./constants"; +import { LogVerbosity, Status } from "./constants"; import { durationToMs, isDuration, msToDuration } from "./duration"; import { ChannelControlHelper, createChildChannelControlHelper, registerLoadBalancerType } from "./experimental"; import { BaseFilter, Filter, FilterFactory } from "./filter"; @@ -28,7 +28,13 @@ import { PickArgs, Picker, PickResult, PickResultType, QueuePicker, UnavailableP import { Subchannel } from "./subchannel"; import { SubchannelAddress, subchannelAddressToString } from "./subchannel-address"; import { BaseSubchannelWrapper, ConnectivityStateListener, SubchannelInterface } from "./subchannel-interface"; +import * as logging from './logging'; +const TRACER_NAME = 'outlier_detection'; + +function trace(text: string): void { + logging.trace(LogVerbosity.DEBUG, TRACER_NAME, text); +} const TYPE_NAME = 'outlier_detection'; @@ -412,6 +418,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { if (!successRateConfig) { return; } + trace('Running success rate check'); // Step 1 const targetRequestVolume = successRateConfig.request_volume; let addresesWithTargetVolume = 0; @@ -424,6 +431,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { successRates.push(successes/(successes + failures)); } } + trace('Found ' + addresesWithTargetVolume + ' success rate candidates; currentEjectionPercent=' + this.getCurrentEjectionPercent() + ' successRates=[' + successRates + ']'); if (addresesWithTargetVolume < successRateConfig.minimum_hosts) { return; } @@ -438,9 +446,10 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { const successRateVariance = successRateDeviationSum / successRates.length; const successRateStdev = Math.sqrt(successRateVariance); const ejectionThreshold = successRateMean - successRateStdev * (successRateConfig.stdev_factor / 1000); + trace('stdev=' + successRateStdev + ' ejectionThreshold=' + ejectionThreshold); // Step 3 - for (const mapEntry of this.addressMap.values()) { + for (const [address, mapEntry] of this.addressMap.entries()) { // Step 3.i if (this.getCurrentEjectionPercent() > this.latestConfig.getMaxEjectionPercent()) { break; @@ -453,9 +462,12 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { } // Step 3.iii const successRate = successes / (successes + failures); + trace('Checking candidate ' + address + ' successRate=' + successRate); if (successRate < ejectionThreshold) { const randomNumber = Math.random() * 100; + trace('Candidate ' + address + ' randomNumber=' + randomNumber + ' enforcement_percentage=' + successRateConfig.enforcement_percentage); if (randomNumber < successRateConfig.enforcement_percentage) { + trace('Ejecting candidate ' + address); this.eject(mapEntry, ejectionTimestamp); } } @@ -470,13 +482,14 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { if (!failurePercentageConfig) { return; } + trace('Running failure percentage check. threshold=' + failurePercentageConfig.threshold + ' request volume threshold=' + failurePercentageConfig.request_volume); // Step 1 if (this.addressMap.size < failurePercentageConfig.minimum_hosts) { return; } // Step 2 - for (const mapEntry of this.addressMap.values()) { + for (const [address, mapEntry] of this.addressMap.entries()) { // Step 2.i if (this.getCurrentEjectionPercent() > this.latestConfig.getMaxEjectionPercent()) { break; @@ -484,6 +497,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { // Step 2.ii const successes = mapEntry.counter.getLastSuccesses(); const failures = mapEntry.counter.getLastFailures(); + trace('Candidate successes=' + successes + ' failures=' + failures); if (successes + failures < failurePercentageConfig.request_volume) { continue; } @@ -491,7 +505,9 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { const failurePercentage = (failures * 100) / (failures + successes); if (failurePercentage > failurePercentageConfig.threshold) { const randomNumber = Math.random() * 100; + trace('Candidate ' + address + ' randomNumber=' + randomNumber + ' enforcement_percentage=' + failurePercentageConfig.enforcement_percentage); if (randomNumber < failurePercentageConfig.enforcement_percentage) { + trace('Ejecting candidate ' + address); this.eject(mapEntry, ejectionTimestamp); } } @@ -525,6 +541,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { private runChecks() { const ejectionTimestamp = new Date(); + trace('Ejection timer running'); this.switchAllBuckets(); @@ -537,7 +554,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { this.runSuccessRateCheck(ejectionTimestamp); this.runFailurePercentageCheck(ejectionTimestamp); - for (const mapEntry of this.addressMap.values()) { + for (const [address, mapEntry] of this.addressMap.entries()) { if (mapEntry.currentEjectionTimestamp === null) { if (mapEntry.ejectionTimeMultiplier > 0) { mapEntry.ejectionTimeMultiplier -= 1; @@ -548,6 +565,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { const returnTime = new Date(mapEntry.currentEjectionTimestamp.getTime()); returnTime.setMilliseconds(returnTime.getMilliseconds() + Math.min(baseEjectionTimeMs * mapEntry.ejectionTimeMultiplier, Math.max(baseEjectionTimeMs, maxEjectionTimeMs))); if (returnTime < new Date()) { + trace('Unejecting ' + address); this.uneject(mapEntry); } } @@ -564,6 +582,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { } for (const address of subchannelAddresses) { if (!this.addressMap.has(address)) { + trace('Adding map entry for ' + address); this.addressMap.set(address, { counter: new CallCounter(), currentEjectionTimestamp: null, @@ -574,6 +593,7 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { } for (const key of this.addressMap.keys()) { if (!subchannelAddresses.has(key)) { + trace('Removing map entry for ' + key); this.addressMap.delete(key); } } @@ -585,15 +605,18 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { if (lbConfig.getSuccessRateEjectionConfig() || lbConfig.getFailurePercentageEjectionConfig()) { if (this.timerStartTime) { + trace('Previous timer existed. Replacing timer'); clearTimeout(this.ejectionTimer); const remainingDelay = lbConfig.getIntervalMs() - ((new Date()).getTime() - this.timerStartTime.getTime()); this.startTimer(remainingDelay); } else { + trace('Starting new timer'); this.timerStartTime = new Date(); this.startTimer(lbConfig.getIntervalMs()); this.switchAllBuckets(); } } else { + trace('Counting disabled. Cancelling timer.'); this.timerStartTime = null; clearTimeout(this.ejectionTimer); } From 9be6c6c5dab57d3f9f5408c5ee0f6d9ad2a99445 Mon Sep 17 00:00:00 2001 From: Michael Lumish Date: Tue, 2 Aug 2022 13:48:16 -0700 Subject: [PATCH 2/5] Update outlier detection behavior for gRFC updates --- packages/grpc-js-xds/src/load-balancer-cds.ts | 5 ++--- packages/grpc-js/src/load-balancer-outlier-detection.ts | 9 ++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/grpc-js-xds/src/load-balancer-cds.ts b/packages/grpc-js-xds/src/load-balancer-cds.ts index a6dbf42f..4d47b254 100644 --- a/packages/grpc-js-xds/src/load-balancer-cds.ts +++ b/packages/grpc-js-xds/src/load-balancer-cds.ts @@ -79,9 +79,8 @@ function translateOutlierDetectionConfig(outlierDetection: OutlierDetection__Out return undefined; } if (!outlierDetection) { - /* No-op outlier detection config, with max possible interval and no - * ejection criteria configured. */ - return new OutlierDetectionLoadBalancingConfig(~(1<<31), null, null, null, null, null, []); + /* No-op outlier detection config, with all fields unset. */ + return new OutlierDetectionLoadBalancingConfig(null, null, null, null, null, null, []); } let successRateConfig: Partial | null = null; /* Success rate ejection is enabled by default, so we only disable it if diff --git a/packages/grpc-js/src/load-balancer-outlier-detection.ts b/packages/grpc-js/src/load-balancer-outlier-detection.ts index e9793bea..4219e293 100644 --- a/packages/grpc-js/src/load-balancer-outlier-detection.ts +++ b/packages/grpc-js/src/load-balancer-outlier-detection.ts @@ -357,7 +357,10 @@ class OutlierDetectionPicker implements Picker { extraFilterFactories: extraFilterFactories }; } else { - return wrappedPick; + return { + ...wrappedPick, + subchannel: subchannelWrapper.getWrappedSubchannel() + } } } else { return wrappedPick; @@ -619,6 +622,10 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { trace('Counting disabled. Cancelling timer.'); this.timerStartTime = null; clearTimeout(this.ejectionTimer); + for (const mapEntry of this.addressMap.values()) { + this.uneject(mapEntry); + mapEntry.ejectionTimeMultiplier = 0; + } } this.latestConfig = lbConfig; From 3328798d28f544a752f4328fc670a46009a769ba Mon Sep 17 00:00:00 2001 From: Michael Lumish Date: Thu, 4 Aug 2022 12:54:15 -0700 Subject: [PATCH 3/5] grpc-js: Implement getConnectivityState in subchannel wrapper --- packages/grpc-js/src/load-balancer-outlier-detection.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/grpc-js/src/load-balancer-outlier-detection.ts b/packages/grpc-js/src/load-balancer-outlier-detection.ts index 4219e293..afbeb345 100644 --- a/packages/grpc-js/src/load-balancer-outlier-detection.ts +++ b/packages/grpc-js/src/load-balancer-outlier-detection.ts @@ -215,6 +215,14 @@ class OutlierDetectionSubchannelWrapper extends BaseSubchannelWrapper implements }); } + getConnectivityState(): connectivityState { + if (this.ejected) { + return ConnectivityState.TRANSIENT_FAILURE; + } else { + return this.childSubchannelState; + } + } + /** * Add a listener function to be called whenever the wrapper's * connectivity state changes. From 4cfe75b43a72ca98d7745417be86206d611426ac Mon Sep 17 00:00:00 2001 From: Michael Lumish Date: Thu, 4 Aug 2022 15:56:28 -0700 Subject: [PATCH 4/5] grpc-js: Initialize connectivity state from subchannel in outlier detection subchannel wrapper --- packages/grpc-js/src/load-balancer-outlier-detection.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/grpc-js/src/load-balancer-outlier-detection.ts b/packages/grpc-js/src/load-balancer-outlier-detection.ts index afbeb345..0ef401c9 100644 --- a/packages/grpc-js/src/load-balancer-outlier-detection.ts +++ b/packages/grpc-js/src/load-balancer-outlier-detection.ts @@ -199,12 +199,13 @@ export class OutlierDetectionLoadBalancingConfig implements LoadBalancingConfig } class OutlierDetectionSubchannelWrapper extends BaseSubchannelWrapper implements SubchannelInterface { - private childSubchannelState: ConnectivityState = ConnectivityState.IDLE; + private childSubchannelState: ConnectivityState; private stateListeners: ConnectivityStateListener[] = []; private ejected: boolean = false; private refCount: number = 0; constructor(childSubchannel: SubchannelInterface, private mapEntry?: MapEntry) { super(childSubchannel); + this.childSubchannelState = childSubchannel.getConnectivityState(); childSubchannel.addConnectivityStateListener((subchannel, previousState, newState) => { this.childSubchannelState = newState; if (!this.ejected) { From 36f37cb78fb6fd987d9834d71a797ae8fabe2428 Mon Sep 17 00:00:00 2001 From: Michael Lumish Date: Thu, 4 Aug 2022 16:02:05 -0700 Subject: [PATCH 5/5] grpc-js: Propagate ejection when recreating outlier detection subchannel wrapper --- packages/grpc-js/src/load-balancer-outlier-detection.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/grpc-js/src/load-balancer-outlier-detection.ts b/packages/grpc-js/src/load-balancer-outlier-detection.ts index 0ef401c9..ee400d45 100644 --- a/packages/grpc-js/src/load-balancer-outlier-detection.ts +++ b/packages/grpc-js/src/load-balancer-outlier-detection.ts @@ -391,6 +391,10 @@ export class OutlierDetectionLoadBalancer implements LoadBalancer { const originalSubchannel = channelControlHelper.createSubchannel(subchannelAddress, subchannelArgs); const mapEntry = this.addressMap.get(subchannelAddressToString(subchannelAddress)); const subchannelWrapper = new OutlierDetectionSubchannelWrapper(originalSubchannel, mapEntry); + if (mapEntry?.currentEjectionTimestamp !== null) { + // If the address is ejected, propagate that to the new subchannel wrapper + subchannelWrapper.eject(); + } mapEntry?.subchannelWrappers.push(subchannelWrapper); return subchannelWrapper; },