|
37 | 37 | from http import HTTPStatus |
38 | 38 |
|
39 | 39 |
|
40 | | -from typing import TYPE_CHECKING, List, Dict, Tuple |
| 40 | +from typing import TYPE_CHECKING, List, Dict, Tuple, Optional |
41 | 41 |
|
42 | 42 | if TYPE_CHECKING: |
43 | 43 | from ravendb.documents.session import SessionInfo |
@@ -633,7 +633,13 @@ def _send_request_to_server( |
633 | 633 | self._throw_failed_to_contact_all_nodes(command, request) |
634 | 634 |
|
635 | 635 | return None |
636 | | - except IOError as e: |
| 636 | + except (requests.RequestException, OSError) as e: |
| 637 | + # RDBC-948: https://issues.hibernatingrhinos.com/issue/RDBC-948/Python-client-connection-failover-breaks-with-unknown-DNS-name-or-server-is-down. |
| 638 | + # Handle failover on network errors from both requests and the OS: |
| 639 | + # - RequestException covers requests' network stack (connect, TLS, proxies, etc.). |
| 640 | + # - OSError covers socket-level issues like DNS getaddrinfo on some platforms. |
| 641 | + # Different OS/resolvers surface the same fault differently; catching both mirrors the C# client |
| 642 | + # (HttpRequestException/SocketException) and makes failover reliable. |
637 | 643 | if not should_retry: |
638 | 644 | raise |
639 | 645 |
|
@@ -805,7 +811,8 @@ def _throw_failed_to_contact_all_nodes(self, command: RavenCommand, request: req |
805 | 811 | ) |
806 | 812 |
|
807 | 813 | if len(command.failed_nodes) == 1: |
808 | | - raise command.failed_nodes.popitem() |
| 814 | + # raise the single recorded exception |
| 815 | + raise next(iter(command.failed_nodes.values())) |
809 | 816 |
|
810 | 817 | message = ( |
811 | 818 | f"Tried to send {command._result_class.__name__} request via {request.method}" |
@@ -1159,81 +1166,87 @@ def __handle_server_down( |
1159 | 1166 | if command.failed_nodes is None: |
1160 | 1167 | command.failed_nodes = {} |
1161 | 1168 |
|
1162 | | - return ( |
1163 | | - False # todo: command.failed_nodes[chosen_node] = self.__read_exception_from_server(request, response, e) |
1164 | | - ) |
| 1169 | + # record the failure for this node |
| 1170 | + if not command.is_failed_with_node(chosen_node): |
| 1171 | + command.failed_nodes[chosen_node] = self.__read_exception_from_server(request, response, e) |
1165 | 1172 |
|
| 1173 | + # If the node is not part of the topology, we can't failover using selector. |
1166 | 1174 | if node_index is None: |
1167 | | - # We executed request over a node not in the topology. This means no failover... |
1168 | 1175 | return False |
1169 | 1176 |
|
| 1177 | + # If we don't have a selector yet, we also cannot failover. |
1170 | 1178 | if self._node_selector is None: |
1171 | | - # todo: spawnHealthChecks(chosenNode, nodeIndex) |
1172 | 1179 | return False |
1173 | 1180 |
|
1174 | | - # As the server is down, we discard the server version to ensure we update when it goes up. |
| 1181 | + # As the server is down, discard server version to ensure it updates when back up. |
1175 | 1182 | chosen_node.discard_server_version() |
1176 | 1183 |
|
| 1184 | + # Mark the node as failed to move selection forward |
1177 | 1185 | self._node_selector.on_failed_request(node_index) |
1178 | 1186 |
|
| 1187 | + # For broadcastable commands, attempt to broadcast instead of single-node retry. |
1179 | 1188 | if self.should_broadcast(command): |
1180 | 1189 | command.result = self.__broadcast(command, session_info) |
1181 | 1190 | return True |
1182 | 1191 |
|
1183 | | - # todo: self.spawn_health_checks(chosen_node, node_index) |
1184 | | - |
| 1192 | + # Choose the next preferred node and retry |
1185 | 1193 | index_node_and_etag = self._node_selector.get_preferred_node_with_topology() |
| 1194 | + |
| 1195 | + # If topology changed since we started, clear failed nodes record to allow retries |
1186 | 1196 | if command.failover_topology_etag != self.topology_etag: |
1187 | 1197 | command.failed_nodes.clear() |
1188 | 1198 | command.failover_topology_etag = self.topology_etag |
1189 | 1199 |
|
| 1200 | + # Avoid infinite loop if the next node is already marked as failed |
1190 | 1201 | if index_node_and_etag.current_node in command.failed_nodes: |
1191 | 1202 | return False |
1192 | 1203 |
|
1193 | | - self.__on_failed_request_invoke(url, e, request, response) |
| 1204 | + # Notify listeners about the failed request with full details |
| 1205 | + self.__on_failed_request_invoke_details(url, e, request, response) |
1194 | 1206 |
|
| 1207 | + # Retry the command on the next node |
1195 | 1208 | self.execute( |
1196 | 1209 | index_node_and_etag.current_node, index_node_and_etag.current_index, command, should_retry, session_info |
1197 | 1210 | ) |
1198 | 1211 |
|
1199 | 1212 | return True |
1200 | 1213 |
|
1201 | 1214 | @staticmethod |
1202 | | - def __read_exception_from_server(request: requests.Request, response: requests.Response, e: Exception) -> Exception: |
1203 | | - if response and response.content: |
1204 | | - response_json = None |
| 1215 | + def __read_exception_from_server( |
| 1216 | + request: requests.Request, response: requests.Response, e: Optional[Exception] |
| 1217 | + ) -> Exception: |
| 1218 | + # Prefer server-provided error when available |
| 1219 | + if response is not None and response.content: |
| 1220 | + raw = None |
1205 | 1221 | try: |
1206 | | - response_json = response.content.decode("utf-8") |
| 1222 | + raw = response.content.decode("utf-8") |
1207 | 1223 |
|
1208 | | - # todo: change this bs |
1209 | | - def exception_schema_decoder(dictionary: dict) -> ExceptionDispatcher.ExceptionSchema: |
| 1224 | + def _decode(d: dict) -> ExceptionDispatcher.ExceptionSchema: |
1210 | 1225 | return ExceptionDispatcher.ExceptionSchema( |
1211 | | - dictionary.get("url"), |
1212 | | - dictionary.get("class"), |
1213 | | - dictionary.get("message"), |
1214 | | - dictionary.get("error"), |
| 1226 | + d.get("url"), d.get("class"), d.get("message"), d.get("error") |
1215 | 1227 | ) |
1216 | 1228 |
|
1217 | | - return ExceptionDispatcher.get( |
1218 | | - json.loads(response_json, object_hook=exception_schema_decoder), response.status_code, e |
1219 | | - ) |
1220 | | - except: |
1221 | | - exception_schema = ExceptionDispatcher.ExceptionSchema( |
1222 | | - request.url, |
| 1229 | + return ExceptionDispatcher.get(json.loads(raw, object_hook=_decode), response.status_code, e) |
| 1230 | + except Exception: |
| 1231 | + schema = ExceptionDispatcher.ExceptionSchema( |
| 1232 | + request.url if request else "", |
1223 | 1233 | "Unparsable Server Response", |
1224 | | - "Get unrecognized response from the server", |
1225 | | - response_json, |
| 1234 | + "Unrecognized response from server", |
| 1235 | + raw, |
1226 | 1236 | ) |
1227 | | - |
1228 | | - return ExceptionDispatcher.get(exception_schema, response.status_code, e) |
1229 | | - |
1230 | | - exception_schema = ExceptionDispatcher.ExceptionSchema( |
1231 | | - request.url, |
1232 | | - e.__class__.__qualname__, |
1233 | | - e.args[0], |
1234 | | - f"An exception occurred while contacting {request.url}.{os.linesep}{str(e)}", |
1235 | | - ) |
1236 | | - return ExceptionDispatcher.get(exception_schema, HTTPStatus.SERVICE_UNAVAILABLE, e) |
| 1237 | + return ExceptionDispatcher.get(schema, response.status_code, e) |
| 1238 | + |
| 1239 | + # Fallback when we have no usable response body |
| 1240 | + url = request.url if request else "" |
| 1241 | + cls = type(e).__name__ if e else "RequestFailed" |
| 1242 | + msg = str(e) if e else "Request failed" |
| 1243 | + details = f"An exception occurred while contacting {url}." |
| 1244 | + if e: |
| 1245 | + details += f"{os.linesep}{msg}" |
| 1246 | + |
| 1247 | + schema = ExceptionDispatcher.ExceptionSchema(url, cls, msg, details) |
| 1248 | + status = response.status_code if response else HTTPStatus.SERVICE_UNAVAILABLE |
| 1249 | + return ExceptionDispatcher.get(schema, status, e or RuntimeError(msg)) |
1237 | 1250 |
|
1238 | 1251 | class IndexAndResponse: |
1239 | 1252 | def __init__(self, index: int, response: requests.Response): |
|
0 commit comments