Skip to content
This repository was archived by the owner on Sep 9, 2025. It is now read-only.

Commit 5d8d28a

Browse files
comparing using both answers
Signed-off-by: greg pereira <[email protected]>
1 parent 03f05a3 commit 5d8d28a

File tree

2 files changed

+97
-35
lines changed

2 files changed

+97
-35
lines changed

worker/cmd/generate.go

Lines changed: 87 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -216,38 +216,100 @@ var generateCmd = &cobra.Command{
216216
},
217217
}
218218

219-
func (w *Worker) runPrecheckScoring(precheckPRAnswers []string, precheckPRQuestions []string, lab string, outputDir string) error {
220-
if len(precheckPRAnswers) != len(precheckPRQuestions) {
221-
errMsg := "PR questions and BAM answers returned a different number of entries, something went wrong."
219+
func (w *Worker) runPrecheckScoring(precheckPRAnswers []string, precheckEndpointAnswers []string, precheckPRQuestions []string, lab string, outputDir string, preCheckScoringModelName string) error {
220+
if len(precheckPRAnswers) != len(precheckEndpointAnswers) {
221+
errMsg := "PR questions a Endpoint answers returned a different number of entries, something went wrong."
222222
w.logger.Error(errMsg)
223223
return fmt.Errorf(errMsg)
224224
}
225-
// 3. format new request via CLI
226-
// 4. Send request
227-
// 5. recieve data back
228-
// 6. write output to the same outDir as precheck
229-
// 7. Modify generate functions to include this new special file
225+
226+
workDir := "."
227+
if WorkDir != "" {
228+
workDir = WorkDir
229+
}
230+
chatlogDir := path.Join(workDir, "data", "chatlogs")
231+
combinedYAMLScoringPath := path.Join(outputDir, "combined_chatlog_scoring.yaml")
232+
233+
type QuestionScore struct {
234+
Question string
235+
HumanAnswer string
236+
EndpointAnswer string
237+
Score string
238+
}
239+
240+
type QuestionScoreReport struct {
241+
RunTime string
242+
QuestionScores []QuestionScore
243+
}
244+
245+
yamlData := QuestionScoreReport{}
230246
for i := 0; i < len(precheckPRAnswers); i++ {
231247
err, promptTemplate := generatePrecheckScoringPrompt(precheckPRAnswers[i], precheckPRQuestions[i])
232248
if err != nil {
233249
w.logger.Errorf("Failed to generate a prompt for precheck scorring: %v", err)
234250
return err
235251
}
236-
fmt.Print(promptTemplate) // ignoring errors for now
237-
// SOME REQUEST TO SOME PART OF THE BAM ENDPOINT USING THE TEMPLATE
238252

253+
commandStr := fmt.Sprintf("chat --quick-question %s", promptTemplate)
254+
if TlsInsecure {
255+
commandStr += " --tls-insecure"
256+
}
257+
if PreCheckScoringEndpointURL != localEndpoint && preCheckScoringModelName != "unknown" {
258+
commandStr += fmt.Sprintf(" --endpoint-url %s --model %s", PreCheckEndpointURL, preCheckScoringModelName)
259+
}
260+
cmdArgs := strings.Fields(commandStr)
261+
cmd := exec.Command(lab, cmdArgs...)
262+
// Register the command for reporting/logging
263+
w.cmdRun = cmd.String()
264+
w.logger.Infof("Running the precheck scoring command: %s", cmd.String())
265+
266+
cmd.Dir = workDir
267+
cmd.Env = os.Environ()
268+
var out bytes.Buffer
269+
var errOut bytes.Buffer
270+
cmd.Stdout = &out
271+
cmd.Stderr = &errOut
272+
err = cmd.Run()
273+
if err != nil {
274+
w.logger.Errorf("Precheck scoring command failed with error: %v; stderr: %s", err, errOut.String())
275+
continue
276+
}
277+
278+
questionScore := QuestionScore{
279+
Question: precheckPRQuestions[i],
280+
HumanAnswer: precheckPRAnswers[i],
281+
EndpointAnswer: precheckEndpointAnswers[i],
282+
Score: out.String(),
283+
}
284+
yamlData.QuestionScores = append(yamlData.QuestionScores, questionScore)
285+
286+
}
287+
288+
yamlData.RunTime = time.Now().Format("2006-01-02T15_04_05")
289+
290+
scoringYaml, err := yaml.Marshal(yamlData)
291+
if err != nil {
292+
w.logger.Errorf("Could not marshal scoring data to YAML: %v", err)
293+
return err
239294
}
295+
296+
err = os.WriteFile(path.Join(chatlogDir, combinedYAMLScoringPath), scoringYaml, 0644)
297+
if err != nil {
298+
w.logger.Errorf("Could not write chatlog to file: %v", err)
299+
return err
300+
}
301+
240302
return nil
241303
}
242304

243305
// runPrecheck runs lab chat against git diffed yaml files
244-
func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string, []string) {
306+
func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string, []string, []string) {
245307
workDir := "."
246308
if WorkDir != "" {
247309
workDir = WorkDir
248310
}
249311
precheckPRAnswers := []string{}
250-
// precheckEndpointAnswers := []string{}
312+
precheckEndpointAnswers := []string{}
251313
precheckPRQuestions := []string{}
252314
chatlogDir := path.Join(workDir, "data", "chatlogs")
253315
combinedYAMLPath := path.Join(outputDir, "combined_chatlogs.yaml")
@@ -329,19 +391,19 @@ func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string,
329391
stdout, err := cmd.StdoutPipe()
330392
if err != nil {
331393
w.logger.Errorf("Could not get stdout pipe: %v", err)
332-
return err, []string{}, []string{}
394+
return err, []string{}, []string{}, []string{}
333395
}
334396

335397
w.logger.Debug("Running ilab diff")
336398
if err := cmd.Start(); err != nil {
337399
w.logger.Errorf("Could not start command(%s %s): %v", cmd.Path, strings.Join(cmd.Args, " "), err)
338-
return err, []string{}, []string{}
400+
return err, []string{}, []string{}, []string{}
339401
}
340402

341403
output, err := io.ReadAll(stdout)
342404
if err != nil {
343405
w.logger.Errorf("Could not read stdout: %v", err)
344-
return err, []string{}, []string{}
406+
return err, []string{}, []string{}, []string{}
345407
}
346408
outputStr := string(output)
347409
w.logger.Debugf("Output: %s", outputStr)
@@ -359,7 +421,7 @@ func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string,
359421
if yamlFileCount == 0 {
360422
errMsg := "No modified YAML files detected in the PR for precheck"
361423
w.logger.Error(errMsg)
362-
return fmt.Errorf(errMsg), []string{}, []string{}
424+
return fmt.Errorf(errMsg), []string{}, []string{}, []string{}
363425
}
364426

365427
// Proceed with YAML files processing if they exist
@@ -372,14 +434,14 @@ func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string,
372434
f, err := os.Open(filePath)
373435
if err != nil {
374436
w.logger.Errorf("Could not open taxonomy file: %v", err)
375-
return err, []string{}, []string{}
437+
return err, []string{}, []string{}, []string{}
376438
}
377439
defer f.Close()
378440

379441
content, err := io.ReadAll(f)
380442
if err != nil {
381443
w.logger.Error(err)
382-
return err, []string{}, []string{}
444+
return err, []string{}, []string{}, []string{}
383445
}
384446

385447
var data map[string]interface{}
@@ -388,7 +450,7 @@ func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string,
388450
// Odds are, the PR was not yaml-linted since it's invalid YAML failing unmarshalling
389451
err = fmt.Errorf("the original taxonomy YAML likely did not pass yaml-linting, here is the unmarshalling error: %v", err)
390452
w.logger.Error(err)
391-
return err, []string{}, []string{}
453+
return err, []string{}, []string{}, []string{}
392454
}
393455

394456
// Check if "seed_examples" exists and is a list
@@ -397,7 +459,7 @@ func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string,
397459
if !ok {
398460
err = fmt.Errorf("seed_examples not found or not a list")
399461
w.logger.Error(err)
400-
return err, []string{}, []string{}
462+
return err, []string{}, []string{}, []string{}
401463
}
402464

403465
for _, item := range seedExamples {
@@ -457,7 +519,7 @@ func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string,
457519
"output": out.String(),
458520
}
459521

460-
// precheckEndpointAnswers = append(precheckEndpointAnswers, out.String())
522+
precheckEndpointAnswers = append(precheckEndpointAnswers, out.String())
461523
precheckPRQuestions = append(precheckPRQuestions, originalQuestion)
462524

463525
if hasContext {
@@ -492,8 +554,8 @@ func (w *Worker) runPrecheck(lab, outputDir, modelName string) (error, []string,
492554
time.Sleep(1 * time.Second)
493555
}
494556
}
495-
// return nil, precheckPRAnswers, precheckEndpointAnswers
496-
return nil, precheckPRAnswers, precheckPRQuestions
557+
return nil, precheckPRAnswers, precheckEndpointAnswers, precheckPRQuestions
558+
// return nil, precheckPRAnswers, precheckPRQuestions
497559
}
498560

499561
// processJob processes a given job, all jobs start here
@@ -615,13 +677,13 @@ func (w *Worker) processJob() {
615677
case jobPreCheck:
616678
// @instructlab-bot precheck
617679
// Runs precheck on a backend node
618-
err, precheckPRAnswers, precheckEndpointAnswers := w.runPrecheck(lab, outputDir, modelName)
680+
err, precheckPRAnswers, precheckEndpointAnswers, precheckPRQuestions := w.runPrecheck(lab, outputDir, modelName)
619681
if err != nil {
620682
sugar.Errorf("Could not run precheck: %v", err)
621683
w.reportJobError(err)
622684
return
623685
}
624-
err = w.runPrecheckScoring(precheckPRAnswers, precheckEndpointAnswers, lab, outputDir)
686+
err = w.runPrecheckScoring(precheckPRAnswers, precheckEndpointAnswers, precheckPRQuestions, lab, outputDir, modelName)
625687
if err != nil {
626688
sugar.Errorf("Could not run scoring on result of precheck: %v", err)
627689
w.reportJobError(err)

worker/cmd/templates.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -266,11 +266,11 @@ func generateFormattedYAML(ctx context.Context, outputDir, filename string, svc
266266
return s3Key
267267
}
268268

269-
func generatePrecheckScoringPrompt(precheckPRAnswer string, precheckPRQuestion string) (error, string) {
269+
func generatePrecheckScoringPrompt(precheckPRAnswer string, precheckEndpointAnswer string) (error, string) {
270270
promptTemplate := `
271271
Please act as an impartial judge and evaluate the quality of the answer provided by an AI assistant
272272
to the questions displayed below. Evaluate whether or not the answer is a good example of how AI
273-
Assistant should respond to the user’s instruction. Please assign a score using the following 3-point
273+
Assistant as compared to a correct, human provided answer. Please assign a score using the following 3-point
274274
scale:
275275
1: It means the answer is incorrect, irrelevant, unsafe or provides incomplete and garbage information.
276276
For instance, the answer may be factually wrong, off-topic, or filled with irrelevant content that
@@ -285,10 +285,10 @@ func generatePrecheckScoringPrompt(precheckPRAnswer string, precheckPRQuestion s
285285
Begin your evaluation by providing a short explanation. Be as objective as possible. After providing
286286
your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above. Please use the
287287
following example as a reference for your evaluation.
288-
% Input Question:
289-
{{ .Question }}
290-
% Model Output:
291-
{{ .Answer }}
288+
% Human answer:
289+
{{ .HumanAnswer }}
290+
% Model answer:
291+
{{ .ModelAnswer }}
292292
`
293293

294294
tmpl, err := template.New("modelScoring").Parse(promptTemplate)
@@ -297,11 +297,11 @@ func generatePrecheckScoringPrompt(precheckPRAnswer string, precheckPRQuestion s
297297
}
298298

299299
data := struct {
300-
Question string
301-
Answer string
300+
HumanAnswer string
301+
ModelAnswer string
302302
}{
303-
Question: precheckPRQuestion,
304-
Answer: precheckPRAnswer,
303+
HumanAnswer: precheckPRAnswer,
304+
ModelAnswer: precheckEndpointAnswer,
305305
}
306306
var buf bytes.Buffer
307307
err = tmpl.Execute(&buf, data)

0 commit comments

Comments
 (0)