diff --git a/tests/lmeval/configs/w4a4_nvfp4.yaml b/tests/lmeval/configs/w4a4_nvfp4.yaml new file mode 100644 index 000000000..a44c99894 --- /dev/null +++ b/tests/lmeval/configs/w4a4_nvfp4.yaml @@ -0,0 +1,10 @@ +cadence: "weekly" +model: meta-llama/Llama-3.1-8B-Instruct +scheme: NVFP4 +dataset_id: HuggingFaceH4/ultrachat_200k +dataset_split: train_sft +num_calibration_samples: 20 +lmeval: + metrics: + exact_match,flexible-extract: 0.70 + exact_match,strict-match: 0.65 diff --git a/tests/lmeval/test_lmeval.py b/tests/lmeval/test_lmeval.py index 51aa50665..1f36a8c50 100644 --- a/tests/lmeval/test_lmeval.py +++ b/tests/lmeval/test_lmeval.py @@ -90,7 +90,7 @@ def set_up(self, test_data_file: str): logger.info("========== RUNNING ==============") logger.info(self.scheme) - self.num_calibration_samples = 512 + self.num_calibration_samples = eval_config.get("num_calibration_samples", 512) self.max_seq_length = 2048 def test_lm_eval(self, test_data_file: str):