Skip to content

Commit ff831a1

Browse files
authored
Initial setup for evaluation Github Action (#5)
* Initial setup * Re-run 'npm run bundle' * Linter and test fixes * Re-run 'npm run bundle'
1 parent e174f9d commit ff831a1

File tree

8 files changed

+295
-151
lines changed

8 files changed

+295
-151
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,3 @@ jobs:
4343
- name: Test
4444
id: npm-ci-test
4545
run: npm run ci-test
46-
47-
test-action:
48-
name: GitHub Actions Test
49-
runs-on: ubuntu-latest
50-
51-
steps:
52-
- name: Checkout
53-
id: checkout
54-
uses: actions/checkout@v4
55-
56-
- name: Test Local Action
57-
id: test-action
58-
uses: ./
59-
with:
60-
milliseconds: 2000
61-
62-
- name: Print Output
63-
id: output
64-
run: echo "${{ steps.test-action.outputs.time }}"

__tests__/main.test.ts

Lines changed: 122 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,152 @@
11
/**
22
* Unit tests for the action's main functionality, src/main.ts
3-
*
4-
* These should be run as if the action was called from a workflow.
5-
* Specifically, the inputs listed in `action.yml` should be set as environment
6-
* variables following the pattern `INPUT_<INPUT_NAME>`.
73
*/
84

95
import * as core from '@actions/core'
10-
import * as main from '../src/main'
6+
import { run } from '../src/main'
117

12-
// Mock the action's main function
13-
const runMock = jest.spyOn(main, 'run')
8+
// Mocking the @actions/core module
9+
jest.mock('@actions/core')
1410

15-
// Other utilities
16-
const timeRegex = /^\d{2}:\d{2}:\d{2}/
11+
const mockGet = jest.fn()
1712

18-
// Mock the GitHub Actions core library
19-
let debugMock: jest.SpiedFunction<typeof core.debug>
20-
let errorMock: jest.SpiedFunction<typeof core.error>
21-
let getInputMock: jest.SpiedFunction<typeof core.getInput>
22-
let setFailedMock: jest.SpiedFunction<typeof core.setFailed>
23-
let setOutputMock: jest.SpiedFunction<typeof core.setOutput>
13+
jest.mock('@actions/http-client', () => {
14+
return {
15+
HttpClient: jest.fn().mockImplementation(() => {
16+
return { get: mockGet }
17+
})
18+
}
19+
})
20+
21+
describe('GitHub Actions Script', () => {
22+
let getInputMock: jest.Mock
23+
let setOutputMock: jest.Mock
24+
let setFailedMock: jest.Mock
25+
26+
const mockApiResponse = {
27+
status: 'completed',
28+
success: true,
29+
passed: ['datapoint1', 'datapoint2'],
30+
failed: [],
31+
metrics: {
32+
aggregation_function: 'average',
33+
details: []
34+
},
35+
datapoints: []
36+
}
2437

25-
describe('action', () => {
2638
beforeEach(() => {
39+
// Reset mocks before each test
2740
jest.clearAllMocks()
2841

29-
debugMock = jest.spyOn(core, 'debug').mockImplementation()
30-
errorMock = jest.spyOn(core, 'error').mockImplementation()
31-
getInputMock = jest.spyOn(core, 'getInput').mockImplementation()
32-
setFailedMock = jest.spyOn(core, 'setFailed').mockImplementation()
33-
setOutputMock = jest.spyOn(core, 'setOutput').mockImplementation()
34-
})
35-
36-
it('sets the time output', async () => {
37-
// Set the action's inputs as return values from core.getInput()
38-
getInputMock.mockImplementation(name => {
42+
// Mock core.getInput to return test values
43+
getInputMock = core.getInput as jest.Mock
44+
getInputMock.mockImplementation((name: string) => {
3945
switch (name) {
40-
case 'milliseconds':
41-
return '500'
46+
case 'runId':
47+
return 'mockRunId'
48+
case 'projectId':
49+
return 'mockProjectId'
50+
case 'aggregateFunction':
51+
return 'average'
52+
case 'apiUrl':
53+
return 'https://api.example.com'
4254
default:
4355
return ''
4456
}
4557
})
4658

47-
await main.run()
48-
expect(runMock).toHaveReturned()
59+
// Mock core.setOutput and core.setFailed
60+
setOutputMock = core.setOutput as jest.Mock
61+
setFailedMock = core.setFailed as jest.Mock
62+
63+
// Mock the HttpClient's get method
64+
mockGet.mockResolvedValue({
65+
message: { statusCode: 200 },
66+
readBody: jest.fn().mockResolvedValue(JSON.stringify(mockApiResponse))
67+
})
4968

50-
// Verify that all of the core library functions were called correctly
51-
expect(debugMock).toHaveBeenNthCalledWith(1, 'Waiting 500 milliseconds ...')
52-
expect(debugMock).toHaveBeenNthCalledWith(
53-
2,
54-
expect.stringMatching(timeRegex)
69+
// Set up process.env for API key
70+
process.env.HH_API_KEY = 'mockApiKey'
71+
})
72+
73+
it('should set outputs correctly on a successful API call', async () => {
74+
await run()
75+
76+
// Ensure core.getInput was called with expected arguments
77+
expect(getInputMock).toHaveBeenCalledWith('runId', { required: true })
78+
expect(getInputMock).toHaveBeenCalledWith('projectId', { required: true })
79+
expect(getInputMock).toHaveBeenCalledWith('aggregateFunction')
80+
expect(getInputMock).toHaveBeenCalledWith('apiUrl')
81+
82+
// Ensure the HTTP request was made with the correct URL and headers
83+
expect(mockGet).toHaveBeenCalledWith(
84+
'https://api.example.com/eval/mockRunId/result?projectId=mockProjectId&aggregateFunction=average',
85+
{
86+
Authorization: 'Bearer mockApiKey',
87+
'Content-Type': 'application/json'
88+
}
5589
)
56-
expect(debugMock).toHaveBeenNthCalledWith(
57-
3,
58-
expect.stringMatching(timeRegex)
90+
91+
// Ensure core.setOutput was called with the correct values from the mock response
92+
expect(setOutputMock).toHaveBeenCalledWith('status', 'completed')
93+
expect(setOutputMock).toHaveBeenCalledWith('success', true)
94+
expect(setOutputMock).toHaveBeenCalledWith('passed', [
95+
'datapoint1',
96+
'datapoint2'
97+
])
98+
expect(setOutputMock).toHaveBeenCalledWith('failed', [])
99+
expect(setOutputMock).toHaveBeenCalledWith(
100+
'metrics',
101+
mockApiResponse.metrics
59102
)
60-
expect(setOutputMock).toHaveBeenNthCalledWith(
61-
1,
62-
'time',
63-
expect.stringMatching(timeRegex)
103+
expect(setOutputMock).toHaveBeenCalledWith(
104+
'datapoints',
105+
mockApiResponse.datapoints
64106
)
65-
expect(errorMock).not.toHaveBeenCalled()
66107
})
67108

68-
it('sets a failed status', async () => {
69-
// Set the action's inputs as return values from core.getInput()
70-
getInputMock.mockImplementation(name => {
71-
switch (name) {
72-
case 'milliseconds':
73-
return 'this is not a number'
74-
default:
75-
return ''
76-
}
109+
it('should fail the workflow if the API key is missing', async () => {
110+
delete process.env.HH_API_KEY // Simulate missing API key
111+
112+
await run()
113+
114+
// Ensure core.setFailed was called with the appropriate error message
115+
expect(setFailedMock).toHaveBeenCalledWith(
116+
'API key is missing. Make sure HH_API_KEY is set in the environment.'
117+
)
118+
})
119+
120+
it('should fail the workflow if the API request returns a non-200 status', async () => {
121+
// Mock the HTTP client to return a non-200 status
122+
mockGet.mockResolvedValue({
123+
message: { statusCode: 500 },
124+
readBody: jest
125+
.fn()
126+
.mockResolvedValue(JSON.stringify({ error: 'Server error' }))
127+
})
128+
129+
await run()
130+
131+
// Ensure core.setFailed was called with the appropriate error message
132+
expect(setFailedMock).toHaveBeenCalledWith(
133+
'API request failed with status code 500'
134+
)
135+
})
136+
137+
it('should handle and report an error from the API response', async () => {
138+
// Mock the HTTP client to return invalid JSON
139+
mockGet.mockResolvedValue({
140+
message: { statusCode: 200 },
141+
readBody: jest.fn().mockResolvedValue('Invalid JSON')
77142
})
78143

79-
await main.run()
80-
expect(runMock).toHaveReturned()
144+
await run()
81145

82-
// Verify that all of the core library functions were called correctly
83-
expect(setFailedMock).toHaveBeenNthCalledWith(
84-
1,
85-
'milliseconds not a number'
146+
// Ensure core.setFailed was called due to JSON parsing error
147+
expect(setFailedMock).toHaveBeenCalled()
148+
expect(setFailedMock).toHaveBeenCalledWith(
149+
expect.stringContaining('Unexpected token')
86150
)
87-
expect(errorMock).not.toHaveBeenCalled()
88151
})
89152
})

action.yml

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,45 @@
1-
name: 'The name of your action here'
2-
description: 'Provide a description here'
3-
author: 'Your name or organization here'
1+
name: 'HoneyHive Evaluation'
2+
description: 'Evaluations for LLM applications'
3+
author: 'HoneyHive'
44

5-
# Add your action's branding here. This will appear on the GitHub Marketplace.
65
branding:
7-
icon: 'heart'
8-
color: 'red'
6+
icon: 'check-square'
7+
color: 'yellow'
98

10-
# Define your inputs here.
119
inputs:
12-
milliseconds:
13-
description: 'Your input description here'
10+
runId:
11+
description: 'The ID of the run to evaluate'
12+
required: true
13+
projectId:
14+
description: 'The project ID associated with the evaluation'
15+
required: true
16+
aggregateFunction:
17+
description:
18+
'The function used for aggregating the evaluation results (e.g., average,
19+
min, max, etc.)'
20+
required: false
21+
default: 'average'
22+
apiUrl:
23+
description: 'The base URL of the Honeyhive API'
1424
required: true
15-
default: '1000'
1625

17-
# Define your outputs here.
1826
outputs:
19-
time:
20-
description: 'Your output description here'
27+
status:
28+
description: 'The status of the evaluation run (e.g., pending or completed)'
29+
success:
30+
description: 'Whether all datapoints have passed (true or false)'
31+
passed:
32+
description: 'List of passed datapoint_ids or session_ids'
33+
failed:
34+
description: 'List of failed datapoint_ids or session_ids'
35+
metrics:
36+
description:
37+
'Metric level aggregates and details of passed/failed status for each
38+
metric'
39+
datapoints:
40+
description:
41+
'Details of each datapoint with associated session_id, pass status, and
42+
metric-level pass/fail status'
2143

2244
runs:
2345
using: node20

0 commit comments

Comments
 (0)