Skip to content

Commit d06fbbf

Browse files
committed
add renders
1 parent 8230899 commit d06fbbf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+966
-54
lines changed

benchmarks/metadata_extraction/benchmark.py

Lines changed: 141 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -142,98 +142,164 @@ def score_benchmark(self, all_scores):
142142
def score_request_answer(self,
143143
image_name: str,
144144
response: dict,
145-
ground_truth: dict) -> dict:
145+
ground_truth: dict,
146+
inferred_from_function=False,
147+
inferred_from_correspondence=False) -> dict:
146148
""" Score the answer.
147149
148150
:param image_name: the name of the image
149151
:param response: the response
150152
:param ground_truth: the ground truth
153+
:param inferred_from_function: whether to filter by persons inferred from function, defaults to False
154+
:param inferred_from_correspondence: whether to filter by persons inferred from correspondence, defaults to False
151155
"""
152156

153-
logging.debug(f"image_name: {image_name}")
154-
logging.debug(f"response: {response}")
155-
logging.debug(f"ground_truth: {ground_truth}")
156-
157157
data = self.prepare_scoring_data(response)
158158

159-
try:
160-
raw_response_letter = data["metadata"]
161-
raw_response_letter["document_number"] = image_name
162-
response_letter = Letter(**raw_response_letter)
163-
except ValueError:
164-
logging.error(f"Error parsing response for {image_name}")
165-
166-
try:
167-
ground_truth["document_number"] = image_name
168-
ground_truth_letter = Letter(**ground_truth)
169-
except ValueError:
170-
logging.error(f"ValueError parsing ground_truth for {image_name}")
171-
except TypeError:
172-
logging.error(f"TypeError parsing ground_truth for {image_name}")
173-
174-
logging.debug(f"response_letter: {response_letter}")
175-
logging.debug(f"ground_truth_letter: {ground_truth_letter}")
159+
response_letter = self.initialize_letter(raw_letter=data["metadata"],
160+
image_name=image_name)
161+
ground_truth_letter = self.initialize_letter(raw_letter=ground_truth,
162+
image_name=image_name)
176163

177164
score = self.score_send_date(ground_truth_letter=ground_truth_letter,
178165
predicted_letter=response_letter)
179166

180167
try:
181168
persons = json.load(open(os.path.join(self.benchmark_dir, "ground_truths", "persons.json")))
182-
except FileNotFoundError:
183-
logging.error("Persons ground truth not found.")
169+
except FileNotFoundError as e:
170+
logging.error(f"{e}: Persons ground truth not found!")
184171

185172
score = score | self.score_persons(sender_or_receiver="sender",
186173
ground_truth_letter=ground_truth_letter,
187174
predicted_letter=response_letter,
188175
persons=persons,
189-
inferred_from_function=False,
190-
inferred_from_correspondence=False)
176+
inferred_from_function=inferred_from_function,
177+
inferred_from_correspondence=inferred_from_correspondence)
191178

192179
score = score | self.score_persons(sender_or_receiver="receiver",
193180
ground_truth_letter=ground_truth_letter,
194181
predicted_letter=response_letter,
195182
persons=persons,
196-
inferred_from_function=False,
197-
inferred_from_correspondence=False)
183+
inferred_from_function=inferred_from_function,
184+
inferred_from_correspondence=inferred_from_correspondence)
198185
return score
199186

200187
def create_request_render(self,
201188
image_name: str,
202189
result: dict,
203190
score: dict,
204-
ground_truth) -> str:
191+
ground_truth: dict,
192+
inferred_from_function=False,
193+
inferred_from_correspondence=False) -> str:
194+
""" Create a render for the request.
195+
196+
:param image_name: the name of the image
197+
:param result: the result
198+
:param score: the score
199+
:param ground_truth: the ground truth
200+
:param inferred_from_function: whether to filter by persons inferred from function, defaults to False
201+
:param inferred_from_correspondence: whether to filter by persons inferred from correspondence, defaults to False
202+
"""
205203

206204
data = self.prepare_scoring_data(result)
207205

208-
try:
209-
raw_response_letter = data["metadata"]
210-
raw_response_letter["document_number"] = image_name
211-
response_letter = Letter(**raw_response_letter)
212-
except ValueError:
213-
logging.error(f"Error parsing response for {image_name}")
206+
response_letter = self.initialize_letter(raw_letter=data["metadata"],
207+
image_name=image_name)
208+
ground_truth_letter = self.initialize_letter(raw_letter=ground_truth,
209+
image_name=image_name)
214210

215-
try:
216-
ground_truth["document_number"] = image_name
217-
ground_truth_letter = Letter(**ground_truth)
218-
except ValueError:
219-
logging.error(f"ValueError parsing ground_truth for {image_name}")
220-
except TypeError:
221-
logging.error(f"TypeError parsing ground_truth for {image_name}")
211+
ground_truth_sender_persons = self.select_persons(sender_or_receiver="sender",
212+
ground_truth_letter=ground_truth_letter,
213+
inferred_from_function=inferred_from_function,
214+
inferred_from_correspondence=inferred_from_correspondence)
215+
ground_truth_receiver_persons = self.select_persons(sender_or_receiver="receiver",
216+
ground_truth_letter=ground_truth_letter,
217+
inferred_from_function=inferred_from_function,
218+
inferred_from_correspondence=inferred_from_correspondence)
219+
220+
ground_truth_persons = ground_truth_sender_persons + ground_truth_receiver_persons
221+
ground_truth_persons = [person for person in ground_truth_persons if person.name != "None"]
222222

223223
try:
224224
persons = json.load(open(os.path.join(self.benchmark_dir, "ground_truths", "persons.json")))
225+
for person in ground_truth_persons:
226+
for key in persons:
227+
if person.name == key["name"]:
228+
person.alternate_names = key["alternateName"]
229+
break
225230
except FileNotFoundError:
226231
logging.error("Persons ground truth not found.")
227232

228-
logging.info(f"prediction: {response_letter}")
229-
logging.info(f"gt: {ground_truth_letter}")
233+
scoring_table = "| Metric | Ground Truth | Prediction | TP | FP | FN |\n"
234+
scoring_table += "|------------------|--------------|------------|----|----|----|\n"
235+
scoring_table += f"| `send_date` | {ground_truth_letter.send_date} | {response_letter.send_date} | {score['send_date_tp']} | {score['send_date_fp']} | {score['send_date_fn']} |\n"
236+
scoring_table += f"| `sender_persons` | {self.make_render_person(persons=ground_truth_sender_persons)} | {self.make_render_person(persons=response_letter.sender_persons)} | {score['sender_persons_tp']} | {score['sender_persons_fp']} | {score['sender_persons_fn']} |\n"
237+
scoring_table += f"| `receiver_persons` | {self.make_render_person(persons=ground_truth_receiver_persons)} | {self.make_render_person(persons=response_letter.receiver_persons)} | {score['receiver_persons_tp']} | {score['receiver_persons_fp']} | {score['receiver_persons_fn']} |\n"
238+
239+
persons_table = "| Name | Alternate Names |\n"
240+
persons_table += "| --- | --- |\n"
241+
242+
logging.info(f"ground_truth_persons: {ground_truth_persons}")
243+
for person in ground_truth_persons:
244+
try:
245+
person.alternate_names.sort()
246+
alt_names = "<br>".join(person.alternate_names)
247+
except (TypeError, AttributeError):
248+
alt_names = "None"
249+
persons_table += f"| {person.name} | {alt_names} |\n"
250+
251+
render = (
252+
f"### Result for {response_letter.document_number}\n"
253+
f"{scoring_table}\n"
254+
f"{persons_table}\n"
255+
f"`inferred_from_function`: {inferred_from_function}\n\n"
256+
f"`inferred_from_correspondence`: {inferred_from_correspondence}\n"
257+
)
258+
259+
return render
260+
261+
@staticmethod
262+
def initialize_letter(raw_letter: dict,
263+
image_name: str) -> Letter:
264+
""" Initialize a Letter object from a dictionary.
265+
266+
:param raw_letter: the raw letter data
267+
:param image_name: the name of the image
268+
"""
269+
270+
try:
271+
raw_letter["document_number"] = image_name
272+
return Letter(**raw_letter)
273+
except (ValueError, TypeError) as e:
274+
logging.error(f"{e} parsing {raw_letter} for {image_name}!")
275+
276+
@staticmethod
277+
def make_render_person(persons: list[Person]) -> str | None:
278+
""" Render a list of persons as a string.
279+
280+
:param persons: the list of persons
281+
"""
230282

231-
return ""
283+
rendered_persons = []
284+
try:
285+
for person in persons:
286+
if person.name == "None":
287+
return None
288+
rendered_persons.append(person.name)
289+
if len(rendered_persons) == 0:
290+
return None
291+
return "<br>".join(rendered_persons)
292+
except TypeError:
293+
return None
232294

233295
@staticmethod
234296
def score_send_date(ground_truth_letter: Letter,
235297
predicted_letter: Letter) -> dict[str, int]:
236-
""" Score 'send_date'. """
298+
""" Score 'send_date'.
299+
300+
:param ground_truth_letter: the ground truth letter
301+
:param predicted_letter: the predicted letter
302+
"""
237303

238304
predicted_date = predicted_letter.send_date
239305
ground_truth_date = ground_truth_letter.send_date
@@ -257,6 +323,31 @@ def score_send_date(ground_truth_letter: Letter,
257323
"send_date_fp": len(predicted_date - ground_truth_date),
258324
"send_date_fn": len(ground_truth_date - predicted_date)}
259325

326+
@staticmethod
327+
def select_persons(sender_or_receiver: Literal["sender", "receiver"],
328+
ground_truth_letter: Letter,
329+
inferred_from_function: bool = False,
330+
inferred_from_correspondence: bool = False
331+
):
332+
""" Select 'sender_persons' or 'receiver_persons' from the ground truth and filter by inference method.
333+
334+
:param sender_or_receiver: whether to select sender or receiver persons
335+
:param ground_truth_letter: the ground truth letter
336+
:param inferred_from_function: whether to filter by persons inferred from function, defaults to False
337+
:param inferred_from_correspondence: whether to filter by persons inferred from correspondence, defaults to False
338+
"""
339+
340+
ground_truth_persons = []
341+
for person in ground_truth_letter.__getattribute__(f"{sender_or_receiver}_persons"):
342+
if inferred_from_function is False and person.inferred_from_function is True:
343+
continue
344+
elif inferred_from_correspondence is False and person.inferred_from_correspondence is True:
345+
continue
346+
else:
347+
ground_truth_persons.append(person)
348+
349+
return ground_truth_persons
350+
260351
def score_persons(self,
261352
sender_or_receiver: Literal["sender", "receiver"],
262353
ground_truth_letter: Letter,
@@ -275,14 +366,10 @@ def score_persons(self,
275366
"""
276367

277368
# select ground truth persons:
278-
ground_truth_persons = []
279-
for person in ground_truth_letter.__getattribute__(f"{sender_or_receiver}_persons"):
280-
if inferred_from_function is False and person.inferred_from_function is True:
281-
continue
282-
elif inferred_from_correspondence is False and person.inferred_from_correspondence is True:
283-
continue
284-
else:
285-
ground_truth_persons.append(person)
369+
ground_truth_persons = self.select_persons(sender_or_receiver=sender_or_receiver,
370+
ground_truth_letter=ground_truth_letter,
371+
inferred_from_function=inferred_from_function,
372+
inferred_from_correspondence=inferred_from_correspondence)
286373

287374
# select predicted persons:
288375
predicted_persons = []

benchmarks/metadata_extraction/person.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ class Person:
1010
"""Representation of a person.
1111
1212
:param name: name of the person
13+
:param alternate_names: alternate names of the person
1314
:param inferred_from_function: whether the person was inferred from a job function
1415
:param inferred_from_correspondence: whether the person was inferred from the correspondence history
1516
"""
1617

1718
name: str
19+
alternate_names: list = None
1820
inferred_from_function: bool = False
1921
inferred_from_correspondence: bool = False
2022

renders/2025-03-04/T10/letter01.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
### Result for letter01
2+
| Metric | Ground Truth | Prediction | TP | FP | FN |
3+
|------------------|--------------|------------|----|----|----|
4+
| `send_date` | 1926-02-16 | 1926-02-16 | 1 | 0 | 0 |
5+
| `sender_persons` | Groschupf-Jaeger, Louis<br>Ritter-Dreier, Fritz | Basler Rheinschiffahrt-Aktiengesellschaft | 0 | 1 | 2 |
6+
| `receiver_persons` | Christ-Wackernagel, Paul | Herrn Christ | 1 | 0 | 0 |
7+
8+
| Name | Alternate Names |
9+
| --- | --- |
10+
| Groschupf-Jaeger, Louis | Groschopf<br>Groschupf<br>Herr Groschupf<br>Herrn Groschupf |
11+
| Ritter-Dreier, Fritz | Fritz Ritter<br>Herr Ritter<br>Herrn Fritz Ritter<br>J.A. Ritter<br>J.A.Ritter<br>Ritter |
12+
| Christ-Wackernagel, Paul | Christ<br>Christ-Wackernagel<br>Herr Christ<br>Herr P. Christ<br>Herr P. Christ - Wackernagel<br>Herr Vice- präsident Christ<br>Herren Christ<br>Herrn Christ<br>Herrn P. Christ<br>Herrn P. Christ - Wackernagel<br>Herrn P. Christ-Wackernagel<br>Herrn Paul Christ<br>Herrn Vizepräsidenten Christ<br>P. Christ-Wackernagel<br>Paul Christ<br>Paul Christ-Wackernagel |
13+
14+
`inferred_from_function`: False
15+
16+
`inferred_from_correspondence`: False

renders/2025-03-04/T10/letter02.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
### Result for letter02
2+
| Metric | Ground Truth | Prediction | TP | FP | FN |
3+
|------------------|--------------|------------|----|----|----|
4+
| `send_date` | 1926-03-04 | 1926-03-04 | 1 | 0 | 0 |
5+
| `sender_persons` | Ritter-Wehrle, Oskar | Basler Rheinschiffahrt-Aktiengesellschaft | 0 | 1 | 1 |
6+
| `receiver_persons` | Christ-Wackernagel, Paul | Herrn P. Christ | 1 | 0 | 0 |
7+
8+
| Name | Alternate Names |
9+
| --- | --- |
10+
| Ritter-Wehrle, Oskar | <br>Herren Direktor Ritter<br>O. Ritter |
11+
| Christ-Wackernagel, Paul | Christ<br>Christ-Wackernagel<br>Herr Christ<br>Herr P. Christ<br>Herr P. Christ - Wackernagel<br>Herr Vice- präsident Christ<br>Herren Christ<br>Herrn Christ<br>Herrn P. Christ<br>Herrn P. Christ - Wackernagel<br>Herrn P. Christ-Wackernagel<br>Herrn Paul Christ<br>Herrn Vizepräsidenten Christ<br>P. Christ-Wackernagel<br>Paul Christ<br>Paul Christ-Wackernagel |
12+
13+
`inferred_from_function`: False
14+
15+
`inferred_from_correspondence`: False

renders/2025-03-04/T10/letter03.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
### Result for letter03
2+
| Metric | Ground Truth | Prediction | TP | FP | FN |
3+
|------------------|--------------|------------|----|----|----|
4+
| `send_date` | 1926-03-24 | 1926-03-24 | 1 | 0 | 0 |
5+
| `sender_persons` | Ritter-Dreier, Fritz<br>Kachelhofer-Gerber, Frederick Charles | Basler Rheinschiffahrt-Aktiengesellschaft | 0 | 1 | 2 |
6+
| `receiver_persons` | Christ-Wackernagel, Paul | Herrn P. Christ-Weckermagel | 0 | 1 | 1 |
7+
8+
| Name | Alternate Names |
9+
| --- | --- |
10+
| Ritter-Dreier, Fritz | Fritz Ritter<br>Herr Ritter<br>Herrn Fritz Ritter<br>J.A. Ritter<br>J.A.Ritter<br>Ritter |
11+
| Kachelhofer-Gerber, Frederick Charles | None |
12+
| Christ-Wackernagel, Paul | Christ<br>Christ-Wackernagel<br>Herr Christ<br>Herr P. Christ<br>Herr P. Christ - Wackernagel<br>Herr Vice- präsident Christ<br>Herren Christ<br>Herrn Christ<br>Herrn P. Christ<br>Herrn P. Christ - Wackernagel<br>Herrn P. Christ-Wackernagel<br>Herrn Paul Christ<br>Herrn Vizepräsidenten Christ<br>P. Christ-Wackernagel<br>Paul Christ<br>Paul Christ-Wackernagel |
13+
14+
`inferred_from_function`: False
15+
16+
`inferred_from_correspondence`: False

renders/2025-03-04/T10/letter04.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
### Result for letter04
2+
| Metric | Ground Truth | Prediction | TP | FP | FN |
3+
|------------------|--------------|------------|----|----|----|
4+
| `send_date` | 1926-03-26 | 1926-03-26 | 1 | 0 | 0 |
5+
| `sender_persons` | Krasting, Wilhelm | W. W. Moser | 0 | 1 | 1 |
6+
| `receiver_persons` | Christ-Wackernagel, Paul | Herrn Christ | 1 | 0 | 0 |
7+
8+
| Name | Alternate Names |
9+
| --- | --- |
10+
| Krasting, Wilhelm | Dr. Krasting<br>Dr. W. Krasting<br>Herr Dr. Krasting<br>Herrn Dr. Krasting<br>Herrn Dr. W.Krasting |
11+
| Christ-Wackernagel, Paul | Christ<br>Christ-Wackernagel<br>Herr Christ<br>Herr P. Christ<br>Herr P. Christ - Wackernagel<br>Herr Vice- präsident Christ<br>Herren Christ<br>Herrn Christ<br>Herrn P. Christ<br>Herrn P. Christ - Wackernagel<br>Herrn P. Christ-Wackernagel<br>Herrn Paul Christ<br>Herrn Vizepräsidenten Christ<br>P. Christ-Wackernagel<br>Paul Christ<br>Paul Christ-Wackernagel |
12+
13+
`inferred_from_function`: False
14+
15+
`inferred_from_correspondence`: False

renders/2025-03-04/T10/letter05.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
### Result for letter05
2+
| Metric | Ground Truth | Prediction | TP | FP | FN |
3+
|------------------|--------------|------------|----|----|----|
4+
| `send_date` | 1926-03-29 | 1926-03-29 | 1 | 0 | 0 |
5+
| `sender_persons` | None | null | 0 | 0 | 0 |
6+
| `receiver_persons` | Krasting, Wilhelm | Dr. W. Krasting | 1 | 0 | 0 |
7+
8+
| Name | Alternate Names |
9+
| --- | --- |
10+
| Krasting, Wilhelm | Dr. Krasting<br>Dr. W. Krasting<br>Herr Dr. Krasting<br>Herrn Dr. Krasting<br>Herrn Dr. W.Krasting |
11+
12+
`inferred_from_function`: False
13+
14+
`inferred_from_correspondence`: False

renders/2025-03-04/T10/letter06.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
### Result for letter06
2+
| Metric | Ground Truth | Prediction | TP | FP | FN |
3+
|------------------|--------------|------------|----|----|----|
4+
| `send_date` | 1926-03-29 | 1926-03-29 | 1 | 0 | 0 |
5+
| `sender_persons` | None | None | 0 | 0 | 0 |
6+
| `receiver_persons` | None | Basler Rheinschiffahrt-Aktiengesellschaft | 0 | 1 | 0 |
7+
8+
| Name | Alternate Names |
9+
| --- | --- |
10+
11+
`inferred_from_function`: False
12+
13+
`inferred_from_correspondence`: False

0 commit comments

Comments
 (0)