Skip to content

Commit 32ff775

Browse files
Merge remote-tracking branch 'origin/improvement/ARSN-495-kmip-retry' into w/8.1/improvement/ARSN-495-kmip-retry
2 parents 720ced3 + 0ef69d3 commit 32ff775

File tree

5 files changed

+284
-24
lines changed

5 files changed

+284
-24
lines changed

lib/network/kmip/Client.ts

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,9 @@ function _queryOperationsAndObjects(client: any, logger: werelogs.Logger, cb: an
231231
{ vendorIdentification: client.vendorIdentification,
232232
kmip: kmipLog });
233233
}
234+
// flag handshake as done, so if the connection is closed and reconnected
235+
// another handshake won't be done as we already have the server's info
236+
client.kmip.handshakeDone = true;
234237
return cb();
235238
});
236239
}
@@ -336,7 +339,8 @@ export default class Client implements KMSInterface {
336339
kmipMsg(operation, keyIdentifier,
337340
'Server did not return the expected identifier')
338341
);
339-
logger.error(`KMIP::${operation}`,
342+
// warn level to avoid dumping debug and trace logs on retryable errors
343+
logger.warn(`KMIP::${operation}`,
340344
{ error, uniqueIdentifier, keyIdentifier });
341345
return error;
342346
}
@@ -357,7 +361,8 @@ export default class Client implements KMSInterface {
357361
KMIP.TextString('Unique Identifier', keyIdentifier),
358362
], (error, response) => {
359363
if (error) {
360-
logger.error('KMIP::_activateBucketKey',
364+
// warn level to avoid dumping debug and trace logs on retryable errors
365+
logger.warn('KMIP::_activateBucketKey',
361366
{ error,
362367
serverInformation: this.serverInformation });
363368
return cb(error);
@@ -401,7 +406,8 @@ export default class Client implements KMSInterface {
401406
KMIP.Structure('Template-Attribute', attributes),
402407
], (error, response) => {
403408
if (error) {
404-
logger.error('KMIP::createBucketKey',
409+
// warn level to avoid dumping debug and trace logs on retryable errors
410+
logger.warn('KMIP::createBucketKey',
405411
{ error,
406412
serverInformation: this.serverInformation });
407413
return cb(error);
@@ -416,7 +422,8 @@ export default class Client implements KMSInterface {
416422
kmipMsg('Create', bucketName,
417423
'Server created an object of wrong type')
418424
);
419-
logger.error('KMIP::createBucketKey',
425+
// warn level to avoid dumping debug and trace logs on retryable errors
426+
logger.warn('KMIP::createBucketKey',
420427
{ error, createdObjectType });
421428
return cb(error);
422429
}
@@ -448,7 +455,8 @@ export default class Client implements KMSInterface {
448455
]),
449456
], (error, response) => {
450457
if (error) {
451-
logger.error('KMIP::_revokeBucketKey',
458+
// warn level to avoid dumping debug and trace logs on retryable errors
459+
logger.warn('KMIP::_revokeBucketKey',
452460
{ error,
453461
serverInformation: this.serverInformation });
454462
return cb(error);
@@ -469,7 +477,8 @@ export default class Client implements KMSInterface {
469477
return this._revokeBucketKey(bucketKeyId, logger, err => {
470478
if (err) {
471479
const error = arsenalErrorKMIP(err);
472-
logger.error('KMIP::destroyBucketKey: revocation failed',
480+
// warn level to avoid dumping debug and trace logs on retryable errors
481+
logger.warn('KMIP::destroyBucketKey: revocation failed',
473482
{ error,
474483
serverInformation: this.serverInformation });
475484
return cb(error);
@@ -479,7 +488,8 @@ export default class Client implements KMSInterface {
479488
], (err, response) => {
480489
if (err) {
481490
const error = arsenalErrorKMIP(err);
482-
logger.error('KMIP::destroyBucketKey',
491+
// warn level to avoid dumping debug and trace logs on retryable errors
492+
logger.warn('KMIP::destroyBucketKey',
483493
{ error,
484494
serverInformation: this.serverInformation });
485495
return cb(error);
@@ -521,7 +531,8 @@ export default class Client implements KMSInterface {
521531
KMIP.ByteString('IV/Counter/Nonce', CRYPTOGRAPHIC_DEFAULT_IV),
522532
], (error, response) => {
523533
if (error) {
524-
logger.error('KMIP::cipherDataKey',
534+
// warn level to avoid dumping debug and trace logs on retryable errors
535+
logger.warn('KMIP::cipherDataKey',
525536
{ error,
526537
serverInformation: this.serverInformation });
527538
return cb(error);
@@ -563,7 +574,8 @@ export default class Client implements KMSInterface {
563574
KMIP.ByteString('IV/Counter/Nonce', CRYPTOGRAPHIC_DEFAULT_IV),
564575
], (error, response) => {
565576
if (error) {
566-
logger.error('KMIP::decipherDataKey',
577+
// warn level to avoid dumping debug and trace logs on retryable errors
578+
logger.warn('KMIP::decipherDataKey',
567579
{ error,
568580
serverInformation: this.serverInformation });
569581
return cb(error);
@@ -579,7 +591,7 @@ export default class Client implements KMSInterface {
579591
// the bucket does not have to exist, just passing a common bucket name here
580592
this.createBucketKey('kmip-healthcheck-test-bucket', logger, (err, bucketKeyId) => {
581593
if (err) {
582-
logger.error('KMIP::healthcheck: failure to create a test bucket key', {
594+
logger.warn('KMIP::healthcheck: failure to create a test bucket key', {
583595
error: err, kmip: kmipLog,
584596
});
585597
return cb(err);
@@ -588,7 +600,7 @@ export default class Client implements KMSInterface {
588600
{ kmip: kmipLog });
589601
this.destroyBucketKey(bucketKeyId, logger, err => {
590602
if (err) {
591-
logger.error('KMIP::healthcheck: failure to remove the test bucket key', {
603+
logger.warn('KMIP::healthcheck: failure to remove the test bucket key', {
592604
bucketKeyId,
593605
error: err,
594606
kmip: kmipLog,

lib/network/kmip/ClusterClient.ts

Lines changed: 189 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,44 @@ import TTLVCodec from './codec/ttlv';
55
import TlsTransport from './transport/tls';
66
import KMIPClient, { KmipClientOptions } from './Client';
77
import { KmsBackend, KMSInterface, KmsType } from '../KMSInterface';
8-
import type { Logger } from 'werelogs';
8+
import { Logger } from 'werelogs';
99
import async from 'async';
10+
import { ArsenalError, errorInstances } from '../../errors';
11+
import { kmipMsg } from './errorMapping';
12+
13+
interface UnhealthyClient {
14+
client: KMIPClient,
15+
healthcheckTimeout: ReturnType<typeof setTimeout>,
16+
};
17+
18+
/** Array without last item */
19+
type ArrayPopped<Type extends unknown[]> = Type extends [...infer R, unknown] ? R : never;
20+
/** Array's last item */
21+
type ArrayLast<Type extends unknown[]> = Type extends [...unknown[], infer R] ? R : never
22+
23+
interface actions {
24+
createBucketKey: Parameters<ClusterClient['createBucketKey']>;
25+
destroyBucketKey: Parameters<ClusterClient['destroyBucketKey']>;
26+
cipherDataKey: Parameters<ClusterClient['cipherDataKey']>;
27+
decipherDataKey: Parameters<ClusterClient['decipherDataKey']>;
28+
healthcheck: Parameters<ClusterClient['healthcheck']>;
29+
};
30+
31+
interface ClusterClientOptions {
32+
logger: Logger;
33+
retries?: number;
34+
};
35+
36+
const UNHEALTHY_DURATION = 30_000; // 30s
1037

1138
export default class ClusterClient implements KMSInterface {
39+
/** Healthy clients */
1240
private readonly clients: KMIPClient[];
41+
private readonly unhealthyClients: UnhealthyClient[] = [];
1342
private roundRobinIndex = 0;
1443
public readonly backend: KmsBackend<KmsType.external>;
44+
private readonly logger: Logger;
45+
private readonly retries: number;
1546

1647
/**
1748
* Construct a high level cluster of KMIP drivers suitable for cloudserver
@@ -36,7 +67,7 @@ export default class ClusterClient implements KMSInterface {
3667
* defaults to TlsTransport
3768
*/
3869
constructor(
39-
options: KmipClientOptions,
70+
options: KmipClientOptions & ClusterClientOptions,
4071
CodecClass: any,
4172
TransportClass: any,
4273
) {
@@ -47,6 +78,9 @@ export default class ClusterClient implements KMSInterface {
4778
TransportClass || TlsTransport,
4879
));
4980
this.backend = this.clients[0].backend;
81+
this.logger = options.logger;
82+
// if retries is not configured, we retry as much host there are in the cluster
83+
this.retries = typeof options.retries === 'number' ? options.retries : this.clients.length - 1;
5084
}
5185

5286
next() {
@@ -56,25 +90,174 @@ export default class ClusterClient implements KMSInterface {
5690
return this.clients[this.roundRobinIndex++];
5791
}
5892

93+
checkUnhealthyClient(unhealthyClient: UnhealthyClient) {
94+
const client = unhealthyClient.client;
95+
const healthyIndex = this.clients.indexOf(client);
96+
if (healthyIndex === -1) {
97+
this.clients.push(client);
98+
} else {
99+
// should not happen (S3C-9956)
100+
this.logger.warn('checkUnhealthyClient: already moved in healthy', { unhealthyHost: client.host });
101+
}
102+
103+
const unhealthyIndex = this.unhealthyClients.indexOf(unhealthyClient);
104+
if (unhealthyIndex === -1) {
105+
// should not happen (S3C-9956)
106+
this.logger.warn('checkUnhealthyClient: already moved out of unhealthy', { unhealthyHost: client.host });
107+
} else {
108+
this.unhealthyClients.splice(unhealthyIndex, 1);
109+
}
110+
this.logger.info('kmip host healthy again', {
111+
unhealthyHost: client.host,
112+
unhealthy: this.unhealthyClients.length,
113+
healthy: this.clients.length,
114+
});
115+
}
116+
117+
markUnhealthyClient(clientUsed: KMIPClient, logger: Logger, err: Error) {
118+
logger.info('mark kmip host unhealthy', {
119+
err,
120+
msg: err?.toString?.(),
121+
unhealthyHost: clientUsed.host,
122+
unhealthy: this.unhealthyClients.length,
123+
healthy: this.clients.length,
124+
});
125+
const index = this.clients.indexOf(clientUsed);
126+
if (index === -1) {
127+
// should not happen (S3C-9956)
128+
logger.warn('already marked unhealthy');
129+
return;
130+
}
131+
const spliced = this.clients.splice(index, 1);
132+
const unhealthy = {
133+
client: spliced[0],
134+
healthcheckTimeout: setTimeout(() => this.checkUnhealthyClient(unhealthy), UNHEALTHY_DURATION),
135+
};
136+
this.unhealthyClients.push(unhealthy);
137+
// the current index was removed
138+
// decrease so next roundrobin uses current index again with new client
139+
this.roundRobinIndex--;
140+
}
141+
142+
callback<T extends keyof actions>(
143+
clientUsed: KMIPClient,
144+
funcName: T,
145+
args: ArrayPopped<actions[T]>,
146+
logger: ArrayLast<typeof args>,
147+
originalCallback: ArrayLast<actions[T]>,
148+
) {
149+
let retries = 0;
150+
const newCB = (err: (ArsenalError | Error | null) & { code?: number }, ...rest: any[]) => {
151+
if (!err) {
152+
// @ts-expect-error ts2556 typescript won't accept the spread on args array
153+
return originalCallback(err, ...rest);
154+
}
155+
if (err.code && err.code >= 400 && err.code <= 499) {
156+
// @ts-expect-error ts2556 typescript won't accept the spread on args array
157+
return originalCallback(err, ...rest);
158+
}
159+
160+
if (retries === this.retries) {
161+
logger.warn(`kmip max retries reached: ${retries} / ${this.retries}`);
162+
// @ts-expect-error ts2556 typescript won't accept the spread on args array
163+
return originalCallback(err, ...rest);
164+
}
165+
retries++;
166+
167+
this.markUnhealthyClient(clientUsed, logger, err);
168+
169+
if (this.clients.length === 0) {
170+
logger.warn('kmip cluster has no healthy hosts');
171+
// @ts-expect-error ts2556 typescript won't accept the spread on args array
172+
return originalCallback(err, ...rest);
173+
}
174+
175+
const nextClient = this.next();
176+
// @ts-expect-error ts2556 typescript won't accept the spread on args array
177+
nextClient[funcName](...args, newCB);
178+
return undefined;
179+
};
180+
return newCB;
181+
}
59182

60183
createBucketKey(...args: Parameters<KMSInterface['createBucketKey']>) {
184+
const originalCallback = args.pop() as ArrayLast<typeof args>;
185+
const poppedArgs = args as unknown as ArrayPopped<typeof args>;
186+
const logger = args[args.length - 1] as ArrayLast<typeof poppedArgs>;
187+
188+
if (this.clients.length === 0) {
189+
logger.warn('kmip cluster has no healthy hosts');
190+
return originalCallback(errorInstances.InternalError.customizeDescription(
191+
kmipMsg('Create', args[0], `no healthy host in the cluster`)));
192+
}
193+
61194
const client = this.next();
62-
client.createBucketKey.apply(client, args);
195+
196+
client.createBucketKey(
197+
...poppedArgs,
198+
this.callback(client, 'createBucketKey', poppedArgs, logger, originalCallback),
199+
);
200+
return undefined;
63201
}
64202

65203
destroyBucketKey(...args: Parameters<KMSInterface['destroyBucketKey']>) {
204+
const originalCallback = args.pop() as ArrayLast<typeof args>;
205+
const poppedArgs = args as unknown as ArrayPopped<typeof args>;
206+
const logger = args[args.length - 1] as ArrayLast<typeof poppedArgs>;
207+
208+
if (this.clients.length === 0) {
209+
logger.warn('kmip cluster has no healthy hosts');
210+
return originalCallback(errorInstances.InternalError.customizeDescription(
211+
kmipMsg('Destroy', args[0], `no healthy host in the cluster`)));
212+
}
213+
66214
const client = this.next();
67-
client.destroyBucketKey.apply(client, args);
215+
216+
client.destroyBucketKey(
217+
...poppedArgs,
218+
this.callback(client, 'destroyBucketKey', poppedArgs, logger, originalCallback),
219+
);
220+
return undefined;
68221
}
69222

70223
cipherDataKey(...args: Parameters<KMSInterface['cipherDataKey']>) {
224+
const originalCallback = args.pop() as ArrayLast<typeof args>;
225+
const poppedArgs = args as unknown as ArrayPopped<typeof args>;
226+
const logger = args[args.length - 1] as ArrayLast<typeof poppedArgs>;
227+
228+
if (this.clients.length === 0) {
229+
logger.warn('kmip cluster has no healthy hosts');
230+
return originalCallback(errorInstances.InternalError.customizeDescription(
231+
kmipMsg('Encrypt', args[1], `no healthy host in the cluster`)));
232+
}
233+
71234
const client = this.next();
72-
client.cipherDataKey.apply(client, args);
235+
236+
client.cipherDataKey(
237+
...poppedArgs,
238+
this.callback(client, 'cipherDataKey', poppedArgs, logger, originalCallback),
239+
);
240+
return undefined;
73241
}
74242

75243
decipherDataKey(...args: Parameters<KMSInterface['decipherDataKey']>) {
244+
const originalCallback = args.pop() as ArrayLast<typeof args>;
245+
const poppedArgs = args as unknown as ArrayPopped<typeof args>;
246+
const logger = args[args.length - 1] as ArrayLast<typeof poppedArgs>;
247+
248+
if (this.clients.length === 0) {
249+
logger.warn('kmip cluster has no healthy hosts');
250+
return originalCallback(errorInstances.InternalError.customizeDescription(
251+
kmipMsg('Decrypt', args[1], `no healthy host in the cluster`)));
252+
}
253+
76254
const client = this.next();
77-
client.decipherDataKey.apply(client, args);
255+
256+
client.decipherDataKey(
257+
...poppedArgs,
258+
this.callback(client, 'decipherDataKey', poppedArgs, logger, originalCallback),
259+
);
260+
return undefined;
78261
}
79262

80263
clusterHealthcheck(logger: Logger, cb: (err: Error | null) => void) {

lib/network/kmip/error-comparison.jsonc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@
3333
"resultReason": "General Failure",
3434
"resultMessage": [
3535
"[NCERRInsufficientPermissions]: ",
36+
// 24h idle token revoke is undocumented, but thales described us this behavior
37+
// we asked them a tweaked lab to test with a 10m session.
3638
// TLS session idle timeout (24h) - needs a new connection or no idle
37-
"[NCERRUnauthorizedAccess]: Token has been revoked"
39+
"[NCERRUnauthorizedAccess]: Token has been revoked",
40+
// write after receiving above token revoked error
41+
"[NCERRUnauthorizedAccess]: Invalid token"
3842
]
3943
}
4044
}

0 commit comments

Comments
 (0)