-
Notifications
You must be signed in to change notification settings - Fork 2.4k
feat: simplify extraction pipeline and add batch entity summarization #1224
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5081d02
973a9a1
54fb84f
f9600a8
e68ca6d
6a033e2
df2a5f6
6452ee8
ad9d82e
7132b51
7310c78
045bce7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -113,30 +113,59 @@ def _get_model_for_size(self, model_size: ModelSize) -> str: | |
| else: | ||
| return self.model or DEFAULT_MODEL | ||
|
|
||
| def _handle_structured_response(self, response: Any) -> dict[str, Any]: | ||
| """Handle structured response parsing and validation.""" | ||
| def _handle_structured_response( | ||
| self, response: Any | ||
| ) -> tuple[dict[str, Any], int, int]: | ||
| """Handle structured response parsing and validation. | ||
|
|
||
| Returns: | ||
| tuple: (parsed_response, input_tokens, output_tokens) | ||
| """ | ||
| response_object = response.output_text | ||
|
|
||
| # Extract token usage | ||
| input_tokens = 0 | ||
| output_tokens = 0 | ||
| if hasattr(response, 'usage') and response.usage: | ||
| input_tokens = getattr(response.usage, 'input_tokens', 0) or 0 | ||
| output_tokens = getattr(response.usage, 'output_tokens', 0) or 0 | ||
|
|
||
| if response_object: | ||
| return json.loads(response_object) | ||
| elif response_object.refusal: | ||
| raise RefusalError(response_object.refusal) | ||
| return json.loads(response_object), input_tokens, output_tokens | ||
| elif hasattr(response, 'refusal') and response.refusal: | ||
| raise RefusalError(response.refusal) | ||
| else: | ||
| raise Exception(f'Invalid response from LLM: {response_object.model_dump()}') | ||
| raise Exception(f'Invalid response from LLM: {response}') | ||
|
|
||
| def _handle_json_response(self, response: Any) -> tuple[dict[str, Any], int, int]: | ||
| """Handle JSON response parsing. | ||
|
|
||
| def _handle_json_response(self, response: Any) -> dict[str, Any]: | ||
| """Handle JSON response parsing.""" | ||
| Returns: | ||
| tuple: (parsed_response, input_tokens, output_tokens) | ||
| """ | ||
| result = response.choices[0].message.content or '{}' | ||
| return json.loads(result) | ||
|
|
||
| # Extract token usage | ||
| input_tokens = 0 | ||
| output_tokens = 0 | ||
| if hasattr(response, 'usage') and response.usage: | ||
| input_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0 | ||
| output_tokens = getattr(response.usage, 'completion_tokens', 0) or 0 | ||
|
|
||
| return json.loads(result), input_tokens, output_tokens | ||
|
|
||
| async def _generate_response( | ||
| self, | ||
| messages: list[Message], | ||
| response_model: type[BaseModel] | None = None, | ||
| max_tokens: int = DEFAULT_MAX_TOKENS, | ||
| model_size: ModelSize = ModelSize.medium, | ||
| ) -> dict[str, Any]: | ||
| """Generate a response using the appropriate client implementation.""" | ||
| ) -> tuple[dict[str, Any], int, int]: | ||
| """Generate a response using the appropriate client implementation. | ||
|
|
||
| Returns: | ||
| tuple: (response_dict, input_tokens, output_tokens) | ||
| """ | ||
| openai_messages = self._convert_messages_to_openai_format(messages) | ||
| model = self._get_model_for_size(model_size) | ||
|
|
||
|
|
@@ -210,12 +239,20 @@ async def generate_response( | |
|
|
||
| retry_count = 0 | ||
| last_error = None | ||
| total_input_tokens = 0 | ||
| total_output_tokens = 0 | ||
|
|
||
| while retry_count <= self.MAX_RETRIES: | ||
| try: | ||
| response = await self._generate_response( | ||
| response, input_tokens, output_tokens = await self._generate_response( | ||
| messages, response_model, max_tokens, model_size | ||
| ) | ||
| total_input_tokens += input_tokens | ||
| total_output_tokens += output_tokens | ||
|
|
||
| # Record token usage | ||
| self.token_tracker.record(prompt_name, total_input_tokens, total_output_tokens) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Token usage is recorded even when there's an exception during retry attempts. The Consider moving the |
||
|
|
||
| return response | ||
| except (RateLimitError, RefusalError): | ||
| # These errors should not trigger retries | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.