Skip to content

Commit 38c0457

Browse files
authored
feat: update exit cause handling to return a list of causes in PostCompute and PreCompute services (#658)
1 parent 0a884fe commit 38c0457

File tree

4 files changed

+152
-170
lines changed

4 files changed

+152
-170
lines changed

src/main/java/com/iexec/worker/compute/post/PostComputeService.java

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -220,11 +220,11 @@ public PostComputeResponse runTeePostCompute(TaskDescription taskDescription,
220220
}
221221
if (finalStatus == DockerRunFinalStatus.FAILED) {
222222
int exitCode = dockerResponse.getContainerExitCode();
223-
ReplicateStatusCause exitCause = getExitCause(chainTaskId, exitCode);
223+
final List<ReplicateStatusCause> exitCauses = getExitCauses(chainTaskId, exitCode);
224224
log.error("Failed to run tee post-compute [chainTaskId:{}, " +
225-
"exitCode:{}, exitCause:{}]", chainTaskId, exitCode, exitCause);
225+
"exitCode:{}, exitCauses:{}]", chainTaskId, exitCode, exitCauses);
226226
return PostComputeResponse.builder()
227-
.exitCauses(List.of(exitCause))
227+
.exitCauses(exitCauses)
228228
.build();
229229
}
230230
return PostComputeResponse.builder()
@@ -233,25 +233,15 @@ public PostComputeResponse runTeePostCompute(TaskDescription taskDescription,
233233
.build();
234234
}
235235

236-
private ReplicateStatusCause getExitCause(String chainTaskId, Integer exitCode) {
237-
ReplicateStatusCause cause = null;
238-
if (exitCode != null && exitCode != 0) {
239-
switch (exitCode) {
240-
case 1:
241-
// Use first cause from bulk processing for now
242-
cause = computeExitCauseService.getExitCausesAndPruneForGivenComputeStage(chainTaskId, ComputeStage.POST, POST_COMPUTE_FAILED_UNKNOWN_ISSUE).get(0);
243-
break;
244-
case 2:
245-
cause = ReplicateStatusCause.POST_COMPUTE_EXIT_REPORTING_FAILED;
246-
break;
247-
case 3:
248-
cause = ReplicateStatusCause.POST_COMPUTE_TASK_ID_MISSING;
249-
break;
250-
default:
251-
break;
252-
}
253-
}
254-
return cause;
236+
private List<ReplicateStatusCause> getExitCauses(final String chainTaskId, final int exitCode) {
237+
return switch (exitCode) {
238+
case 0 -> List.of();
239+
case 1 ->
240+
computeExitCauseService.getExitCausesAndPruneForGivenComputeStage(chainTaskId, ComputeStage.POST, POST_COMPUTE_FAILED_UNKNOWN_ISSUE);
241+
case 2 -> List.of(ReplicateStatusCause.POST_COMPUTE_EXIT_REPORTING_FAILED);
242+
case 3 -> List.of(ReplicateStatusCause.POST_COMPUTE_TASK_ID_MISSING);
243+
default -> List.of(POST_COMPUTE_FAILED_UNKNOWN_ISSUE);
244+
};
255245
}
256246

257247
private String getTaskTeePostComputeContainerName(String chainTaskId) {

src/main/java/com/iexec/worker/compute/pre/PreComputeService.java

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -156,38 +156,28 @@ private List<ReplicateStatusCause> downloadDatasetAndFiles(
156156
Integer exitCode = prepareTeeInputData(taskDescription, secureSession);
157157
if (exitCode == null || exitCode != 0) {
158158
String chainTaskId = taskDescription.getChainTaskId();
159-
ReplicateStatusCause exitCause = getExitCause(chainTaskId, exitCode); // TODO: Handle list of exit causes
159+
final List<ReplicateStatusCause> exitCauses = getExitCauses(chainTaskId, exitCode);
160160
log.error("Failed to prepare TEE input data [chainTaskId:{}, exitCode:{}, exitCauses:{}]",
161-
chainTaskId, exitCode, exitCause);
162-
return List.of(exitCause);
161+
chainTaskId, exitCode, exitCauses);
162+
return exitCauses;
163163
}
164164
} catch (TimeoutException e) {
165165
return List.of(PRE_COMPUTE_TIMEOUT);
166166
}
167167
return List.of();
168168
}
169169

170-
private ReplicateStatusCause getExitCause(String chainTaskId, Integer exitCode) {
171-
ReplicateStatusCause cause = null;
170+
private List<ReplicateStatusCause> getExitCauses(final String chainTaskId, final Integer exitCode) {
172171
if (exitCode == null) {
173-
cause = PRE_COMPUTE_IMAGE_MISSING;
174-
} else {
175-
switch (exitCode) {
176-
case 1:
177-
// Use first cause from bulk processing for now
178-
cause = computeExitCauseService.getExitCausesAndPruneForGivenComputeStage(chainTaskId, ComputeStage.PRE, PRE_COMPUTE_FAILED_UNKNOWN_ISSUE).get(0);
179-
break;
180-
case 2:
181-
cause = ReplicateStatusCause.PRE_COMPUTE_EXIT_REPORTING_FAILED;
182-
break;
183-
case 3:
184-
cause = ReplicateStatusCause.PRE_COMPUTE_TASK_ID_MISSING;
185-
break;
186-
default:
187-
break;
188-
}
172+
return List.of(PRE_COMPUTE_IMAGE_MISSING);
189173
}
190-
return cause;
174+
return switch (exitCode) {
175+
case 1 -> computeExitCauseService.getExitCausesAndPruneForGivenComputeStage(
176+
chainTaskId, ComputeStage.PRE, PRE_COMPUTE_FAILED_UNKNOWN_ISSUE);
177+
case 2 -> List.of(PRE_COMPUTE_EXIT_REPORTING_FAILED);
178+
case 3 -> List.of(PRE_COMPUTE_TASK_ID_MISSING);
179+
default -> List.of(PRE_COMPUTE_FAILED_UNKNOWN_ISSUE);
180+
};
191181
}
192182

193183

src/test/java/com/iexec/worker/compute/post/PostComputeServiceTests.java

Lines changed: 60 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,15 @@
4343
import lombok.extern.slf4j.Slf4j;
4444
import org.junit.jupiter.api.BeforeEach;
4545
import org.junit.jupiter.api.Test;
46+
import org.junit.jupiter.api.extension.ExtendWith;
4647
import org.junit.jupiter.api.io.TempDir;
4748
import org.junit.jupiter.params.ParameterizedTest;
4849
import org.junit.jupiter.params.provider.MethodSource;
50+
import org.junit.jupiter.params.provider.ValueSource;
4951
import org.mockito.ArgumentCaptor;
5052
import org.mockito.InjectMocks;
5153
import org.mockito.Mock;
52-
import org.mockito.MockitoAnnotations;
54+
import org.mockito.junit.jupiter.MockitoExtension;
5355

5456
import java.io.File;
5557
import java.io.IOException;
@@ -67,17 +69,22 @@
6769
import static org.mockito.Mockito.*;
6870

6971
@Slf4j
72+
@ExtendWith(MockitoExtension.class)
7073
class PostComputeServiceTests {
7174

7275
private static final String CHAIN_TASK_ID = "CHAIN_TASK_ID";
7376
private static final String DATASET_URI = "DATASET_URI";
7477
private static final String WORKER_NAME = "WORKER_NAME";
7578
private static final String TEE_POST_COMPUTE_IMAGE = "TEE_POST_COMPUTE_IMAGE";
76-
private static final long TEE_POST_COMPUTE_HEAP = 1024;
7779
private static final String TEE_POST_COMPUTE_ENTRYPOINT = "postComputeEntrypoint";
7880
private static final TeeSessionGenerationResponse SECURE_SESSION = mock(TeeSessionGenerationResponse.class);
7981
private static final long MAX_EXECUTION_TIME = 1000;
8082

83+
private final TeeAppProperties postComputeProperties = TeeAppProperties.builder()
84+
.image(TEE_POST_COMPUTE_IMAGE)
85+
.entrypoint(TEE_POST_COMPUTE_ENTRYPOINT)
86+
.build();
87+
8188
@TempDir
8289
public File jUnitTemporaryFolder;
8390
private TaskDescription taskDescription = TaskDescription.builder()
@@ -97,10 +104,6 @@ class PostComputeServiceTests {
97104
@Mock
98105
private TeeServicesManager teeServicesManager;
99106
@Mock
100-
private TeeAppProperties preComputeProperties;
101-
@Mock
102-
private TeeAppProperties postComputeProperties;
103-
@Mock
104107
private TeeServicesProperties properties;
105108
@Mock
106109
private DockerClientInstance dockerClientInstanceMock;
@@ -112,19 +115,11 @@ class PostComputeServiceTests {
112115
private TeeServicesPropertiesService teeServicesPropertiesService;
113116
@Mock
114117
private ComputeDurationsService postComputeDurationsService;
115-
116118
@Mock
117119
private TeeService teeMockedService;
118120

119121
@BeforeEach
120122
void beforeEach() {
121-
MockitoAnnotations.openMocks(this);
122-
when(dockerService.getClient()).thenReturn(dockerClientInstanceMock);
123-
when(teeServicesManager.getTeeService(any())).thenReturn(teeMockedService);
124-
when(properties.getPreComputeProperties()).thenReturn(preComputeProperties);
125-
when(properties.getPostComputeProperties()).thenReturn(postComputeProperties);
126-
when(teeServicesPropertiesService.getTeeServicesProperties(CHAIN_TASK_ID)).thenReturn(properties);
127-
128123
output = jUnitTemporaryFolder.getAbsolutePath();
129124
iexecOut = output + IexecFileHelper.SLASH_IEXEC_OUT;
130125
computedJson = iexecOut + IexecFileHelper.SLASH_COMPUTED_JSON;
@@ -209,35 +204,36 @@ void shouldFailResultFilesNameCheckWhenFileNameTooLong() throws IOException {
209204
// endregion
210205

211206
//region runTeePostCompute
207+
void prepareMocksForTeePostCompute(DockerRunResponse dockerRunResponse) {
208+
List<String> env = Arrays.asList("var0", "var1");
209+
when(dockerService.getClient()).thenReturn(dockerClientInstanceMock);
210+
when(teeServicesManager.getTeeService(any())).thenReturn(teeMockedService);
211+
when(teeServicesPropertiesService.getTeeServicesProperties(CHAIN_TASK_ID)).thenReturn(properties);
212+
when(properties.getPostComputeProperties()).thenReturn(postComputeProperties);
213+
when(dockerClientInstanceMock.isImagePresent(TEE_POST_COMPUTE_IMAGE)).thenReturn(true);
214+
when(teeMockedService.buildPostComputeDockerEnv(taskDescription, SECURE_SESSION)).thenReturn(env);
215+
String iexecOutBind = iexecOut + ":" + IexecFileHelper.SLASH_IEXEC_OUT;
216+
when(dockerService.getIexecOutBind(CHAIN_TASK_ID)).thenReturn(iexecOutBind);
217+
when(workerConfigService.getWorkerName()).thenReturn(WORKER_NAME);
218+
when(workerConfigService.getDockerNetworkName()).thenReturn("lasNetworkName");
219+
when(sgxService.getSgxDriverMode()).thenReturn(SgxDriverMode.LEGACY);
220+
when(dockerService.run(any())).thenReturn(dockerRunResponse);
221+
}
222+
212223
@Test
213224
void shouldRunTeePostComputeAndConnectToLasNetwork() {
214-
String lasNetworkName = "networkName";
225+
String lasNetworkName = "lasNetworkName";
215226
taskDescription = TaskDescription.builder()
216227
.chainTaskId(CHAIN_TASK_ID)
217228
.datasetUri(DATASET_URI)
218229
.maxExecutionTime(MAX_EXECUTION_TIME)
219230
.build();
220-
List<String> env = Arrays.asList("var0", "var1");
221-
when(postComputeProperties.getImage()).thenReturn(TEE_POST_COMPUTE_IMAGE);
222-
when(postComputeProperties.getHeapSizeInBytes()).thenReturn(TEE_POST_COMPUTE_HEAP);
223-
when(postComputeProperties.getEntrypoint()).thenReturn(TEE_POST_COMPUTE_ENTRYPOINT);
224-
when(dockerClientInstanceMock.isImagePresent(TEE_POST_COMPUTE_IMAGE))
225-
.thenReturn(true);
226-
when(teeMockedService.buildPostComputeDockerEnv(taskDescription, SECURE_SESSION))
227-
.thenReturn(env);
228-
String iexecOutBind = iexecOut + ":" + IexecFileHelper.SLASH_IEXEC_OUT;
229-
when(dockerService.getIexecOutBind(CHAIN_TASK_ID)).thenReturn(iexecOutBind);
230-
when(workerConfigService.getTaskOutputDir(CHAIN_TASK_ID)).thenReturn(output);
231-
when(workerConfigService.getTaskIexecOutDir(CHAIN_TASK_ID)).thenReturn(iexecOut);
232-
when(workerConfigService.getWorkerName()).thenReturn(WORKER_NAME);
233-
when(workerConfigService.getDockerNetworkName()).thenReturn(lasNetworkName);
234231
DockerRunResponse expectedDockerRunResponse = DockerRunResponse
235232
.builder()
236233
.finalStatus(DockerRunFinalStatus.SUCCESS)
237234
.executionDuration(Duration.ofSeconds(10))
238235
.build();
239-
when(dockerService.run(any())).thenReturn(expectedDockerRunResponse);
240-
when(sgxService.getSgxDriverMode()).thenReturn(SgxDriverMode.LEGACY);
236+
prepareMocksForTeePostCompute(expectedDockerRunResponse);
241237
List<Device> devices = List.of(Device.parse("/dev/isgx"));
242238
when(sgxService.getSgxDevices()).thenReturn(devices);
243239

@@ -251,10 +247,12 @@ void shouldRunTeePostComputeAndConnectToLasNetwork() {
251247
verify(dockerService).run(argumentCaptor.capture());
252248
DockerRunRequest dockerRunRequest =
253249
argumentCaptor.getAllValues().get(0);
250+
String iexecOutBind = iexecOut + ":" + IexecFileHelper.SLASH_IEXEC_OUT;
254251
HostConfig hostConfig = HostConfig.newHostConfig()
255252
.withBinds(Bind.parse(iexecOutBind))
256253
.withDevices(devices)
257254
.withNetworkMode(lasNetworkName);
255+
List<String> env = Arrays.asList("var0", "var1");
258256
assertThat(dockerRunRequest).isEqualTo(
259257
DockerRunRequest.builder()
260258
.hostConfig(hostConfig)
@@ -276,9 +274,9 @@ void shouldNotRunTeePostComputeSinceDockerImageNotFoundLocally() {
276274
.datasetUri(DATASET_URI)
277275
.maxExecutionTime(MAX_EXECUTION_TIME)
278276
.build();
279-
when(postComputeProperties.getImage()).thenReturn(TEE_POST_COMPUTE_IMAGE);
280-
when(postComputeProperties.getHeapSizeInBytes()).thenReturn(TEE_POST_COMPUTE_HEAP);
281-
when(postComputeProperties.getEntrypoint()).thenReturn(TEE_POST_COMPUTE_ENTRYPOINT);
277+
when(dockerService.getClient()).thenReturn(dockerClientInstanceMock);
278+
when(teeServicesPropertiesService.getTeeServicesProperties(CHAIN_TASK_ID)).thenReturn(properties);
279+
when(properties.getPostComputeProperties()).thenReturn(postComputeProperties);
282280
when(dockerClientInstanceMock.isImagePresent(TEE_POST_COMPUTE_IMAGE))
283281
.thenReturn(false);
284282

@@ -297,29 +295,17 @@ void shouldRunTeePostComputeWithFailDockerResponse(Map.Entry<Integer, ReplicateS
297295
.datasetUri(DATASET_URI)
298296
.maxExecutionTime(MAX_EXECUTION_TIME)
299297
.build();
300-
List<String> env = Arrays.asList("var0", "var1");
301-
when(postComputeProperties.getImage()).thenReturn(TEE_POST_COMPUTE_IMAGE);
302-
when(postComputeProperties.getHeapSizeInBytes()).thenReturn(TEE_POST_COMPUTE_HEAP);
303-
when(postComputeProperties.getEntrypoint()).thenReturn(TEE_POST_COMPUTE_ENTRYPOINT);
304-
when(dockerClientInstanceMock.isImagePresent(TEE_POST_COMPUTE_IMAGE))
305-
.thenReturn(true);
306-
when(teeMockedService.buildPostComputeDockerEnv(taskDescription, SECURE_SESSION))
307-
.thenReturn(env);
308-
String iexecOutBind = iexecOut + ":" + IexecFileHelper.SLASH_IEXEC_OUT;
309-
when(dockerService.getIexecOutBind(CHAIN_TASK_ID)).thenReturn(iexecOutBind);
310-
when(workerConfigService.getTaskOutputDir(CHAIN_TASK_ID)).thenReturn(output);
311-
when(workerConfigService.getTaskIexecOutDir(CHAIN_TASK_ID)).thenReturn(iexecOut);
312-
when(workerConfigService.getWorkerName()).thenReturn(WORKER_NAME);
313-
when(workerConfigService.getDockerNetworkName()).thenReturn("lasNetworkName");
314298
DockerRunResponse expectedDockerRunResponse =
315299
DockerRunResponse.builder()
316300
.finalStatus(DockerRunFinalStatus.FAILED)
317301
.containerExitCode(exitCodeKeyToExpectedCauseValue.getKey())
318302
.build();
319-
when(dockerService.run(any())).thenReturn(expectedDockerRunResponse);
320-
when(sgxService.getSgxDriverMode()).thenReturn(SgxDriverMode.LEGACY);
321-
when(computeExitCauseService.getExitCausesAndPruneForGivenComputeStage(CHAIN_TASK_ID, ComputeStage.POST, POST_COMPUTE_FAILED_UNKNOWN_ISSUE))
322-
.thenReturn(List.of(exitCodeKeyToExpectedCauseValue.getValue()));
303+
prepareMocksForTeePostCompute(expectedDockerRunResponse);
304+
// Only stub computeExitCauseService for exitCode == 1
305+
if (exitCodeKeyToExpectedCauseValue.getKey() == 1) {
306+
when(computeExitCauseService.getExitCausesAndPruneForGivenComputeStage(CHAIN_TASK_ID, ComputeStage.POST, POST_COMPUTE_FAILED_UNKNOWN_ISSUE))
307+
.thenReturn(List.of(exitCodeKeyToExpectedCauseValue.getValue()));
308+
}
323309

324310
PostComputeResponse postComputeResponse =
325311
postComputeService.runTeePostCompute(taskDescription, SECURE_SESSION);
@@ -345,24 +331,11 @@ void shouldNotRunTeePostComputeSinceTimeout() {
345331
.datasetUri(DATASET_URI)
346332
.maxExecutionTime(MAX_EXECUTION_TIME)
347333
.build();
348-
List<String> env = Arrays.asList("var0", "var1");
349-
when(postComputeProperties.getImage()).thenReturn(TEE_POST_COMPUTE_IMAGE);
350-
when(postComputeProperties.getHeapSizeInBytes()).thenReturn(TEE_POST_COMPUTE_HEAP);
351-
when(postComputeProperties.getEntrypoint()).thenReturn(TEE_POST_COMPUTE_ENTRYPOINT);
352-
when(dockerClientInstanceMock.isImagePresent(TEE_POST_COMPUTE_IMAGE))
353-
.thenReturn(true);
354-
when(teeMockedService.buildPostComputeDockerEnv(taskDescription, SECURE_SESSION))
355-
.thenReturn(env);
356-
when(dockerService.getIexecOutBind(CHAIN_TASK_ID)).thenReturn("/iexec_out:/iexec_out");
357-
when(workerConfigService.getTaskOutputDir(CHAIN_TASK_ID)).thenReturn(output);
358-
when(workerConfigService.getWorkerName()).thenReturn(WORKER_NAME);
359-
when(workerConfigService.getDockerNetworkName()).thenReturn("lasNetworkName");
360334
DockerRunResponse expectedDockerRunResponse =
361335
DockerRunResponse.builder()
362336
.finalStatus(DockerRunFinalStatus.TIMEOUT)
363337
.build();
364-
when(dockerService.run(any())).thenReturn(expectedDockerRunResponse);
365-
when(sgxService.getSgxDriverMode()).thenReturn(SgxDriverMode.LEGACY);
338+
prepareMocksForTeePostCompute(expectedDockerRunResponse);
366339

367340
PostComputeResponse postComputeResponse =
368341
postComputeService.runTeePostCompute(taskDescription, SECURE_SESSION);
@@ -372,5 +345,26 @@ void shouldNotRunTeePostComputeSinceTimeout() {
372345
.containsExactly(ReplicateStatusCause.POST_COMPUTE_TIMEOUT);
373346
verify(dockerService).run(any());
374347
}
348+
349+
// region getExitCauses
350+
@ParameterizedTest
351+
@ValueSource(ints = {4, 5, 10, 42, 127, 255})
352+
void shouldReturnUnknownIssueForUnmappedExitCodes(int exitCode) {
353+
taskDescription = TaskDescription.builder()
354+
.chainTaskId(CHAIN_TASK_ID)
355+
.datasetUri(DATASET_URI)
356+
.maxExecutionTime(MAX_EXECUTION_TIME)
357+
.build();
358+
final DockerRunResponse dockerResponse = DockerRunResponse.builder()
359+
.finalStatus(DockerRunFinalStatus.FAILED)
360+
.containerExitCode(exitCode)
361+
.build();
362+
prepareMocksForTeePostCompute(dockerResponse);
363+
final PostComputeResponse response = postComputeService.runTeePostCompute(taskDescription, SECURE_SESSION);
364+
assertThat(response.isSuccessful()).isFalse();
365+
assertThat(response.getExitCauses())
366+
.hasSize(1)
367+
.containsExactly(POST_COMPUTE_FAILED_UNKNOWN_ISSUE);
368+
}
375369
//endregion
376370
}

0 commit comments

Comments
 (0)