Skip to content

Commit d0de78e

Browse files
authored
fix: ensure sentence_transformers_similarity score is a float to not np.float (#9665)
* fix: ensure sentence_transformers_similarity score is a float to prevent serialization issues * solve PR comments
1 parent f2012a4 commit d0de78e

File tree

3 files changed

+33
-1
lines changed

3 files changed

+33
-1
lines changed

haystack/components/rankers/sentence_transformers_similarity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def run(
276276
ranked_docs = []
277277
for el in ranking_result:
278278
index = el["corpus_id"]
279-
score = el["score"]
279+
score = float(el["score"])
280280
document = copy(documents[index])
281281
document.score = score
282282
ranked_docs.append(document)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
fixes:
3+
- |
4+
Ensure that the `score` field in `SentenceTransformersSimilarityRanker` is
5+
returned as a Python `float` instead of `numpy.float32`. This prevents potential
6+
serialization issues in downstream integrations.

test/components/rankers/test_sentence_transformers_similarity.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from unittest.mock import MagicMock, patch
66

7+
import numpy as np
78
import pytest
89
import torch
910

@@ -350,6 +351,24 @@ def test_score_threshold(self):
350351
out = ranker.run(query="test", documents=documents)
351352
assert len(out["documents"]) == 1
352353

354+
def test_scores_cast_to_python_float_when_numpy_scalars_returned(self):
355+
mock_cross_encoder = MagicMock()
356+
ranker = SentenceTransformersSimilarityRanker(model="model")
357+
ranker._cross_encoder = mock_cross_encoder
358+
359+
# Simulate backend returning numpy scalar types
360+
mock_cross_encoder.rank.return_value = [
361+
{"score": np.float32(0.123), "corpus_id": 0},
362+
{"score": np.float64(0.456), "corpus_id": 1},
363+
]
364+
365+
documents = [Document(content="doc 0"), Document(content="doc 1")]
366+
out = ranker.run(query="test", documents=documents)
367+
368+
assert len(out["documents"]) == 2
369+
for d in out["documents"]:
370+
assert isinstance(d.score, float)
371+
353372
@pytest.mark.integration
354373
@pytest.mark.slow
355374
def test_run(self):
@@ -373,6 +392,9 @@ def test_run(self):
373392
assert docs_after[1].score == pytest.approx(sorted_scores[1], abs=1e-6)
374393
assert docs_after[2].score == pytest.approx(sorted_scores[2], abs=1e-6)
375394

395+
for doc in docs_after:
396+
assert isinstance(doc.score, float)
397+
376398
@pytest.mark.integration
377399
@pytest.mark.slow
378400
def test_run_top_k(self):
@@ -393,6 +415,9 @@ def test_run_top_k(self):
393415
sorted_scores = sorted([doc.score for doc in docs_after], reverse=True)
394416
assert [doc.score for doc in docs_after] == sorted_scores
395417

418+
for doc in docs_after:
419+
assert isinstance(doc.score, float)
420+
396421
@pytest.mark.integration
397422
@pytest.mark.slow
398423
def test_run_single_document(self):
@@ -403,3 +428,4 @@ def test_run_single_document(self):
403428
docs_after = output["documents"]
404429

405430
assert len(docs_after) == 1
431+
assert isinstance(docs_after[0].score, float)

0 commit comments

Comments
 (0)