diff --git a/.github/workflows/pytests.yml b/.github/workflows/pytests.yml index 35d6ef8..bbd1f8d 100644 --- a/.github/workflows/pytests.yml +++ b/.github/workflows/pytests.yml @@ -2,7 +2,7 @@ name: Pytest on: push: - branches: [ main ] + branches: [ main, ete4 ] pull_request: branches: [ main ] diff --git a/har2tree/har2tree.py b/har2tree/har2tree.py index a875f56..5649dc6 100644 --- a/har2tree/har2tree.py +++ b/har2tree/har2tree.py @@ -313,20 +313,20 @@ def __init__(self, har_path: Path, capture_uuid: str): # Generate cookies lookup tables # All the initial cookies sent with the initial request given to splash self.initial_cookies: dict[str, dict[str, Any]] = {} - if hasattr(self._nodes_list[0], 'cookies_sent'): + if 'cookies_sent' in self._nodes_list[0].props: self.initial_cookies = {key: cookie for key, cookie in self._nodes_list[0].cookies_sent.items()} # Dictionary of all cookies received during the capture self.cookies_received: dict[str, list[tuple[str, URLNode, bool]]] = defaultdict(list) for n in self._nodes_list: - if hasattr(n, 'cookies_received'): + if 'cookies_received' in n.props: for domain, c_received, is_3rd_party in n.cookies_received: self.cookies_received[c_received].append((domain, n, is_3rd_party)) # Dictionary of all cookies sent during the capture self.cookies_sent: dict[str, list[URLNode]] = defaultdict(list) for n in self._nodes_list: - if hasattr(n, 'cookies_sent'): + if 'cookies_sent' in n.props: for c_sent in n.cookies_sent.keys(): self.cookies_sent[c_sent].append(n) @@ -342,7 +342,7 @@ def __init__(self, har_path: Path, capture_uuid: str): self.locally_created_not_sent: dict[str, dict[str, Any]] = self.locally_created.copy() # Cross reference the source of the cookie for n in self._nodes_list: - if hasattr(n, 'cookies_sent'): + if 'cookies_sent' in n.props: for c_sent in n.cookies_sent: # Remove cookie from list if sent during the capture. self.locally_created_not_sent.pop(c_sent, None) @@ -359,7 +359,7 @@ def __init__(self, har_path: Path, capture_uuid: str): # Add context if urls are found in external_ressources for n in self._nodes_list: - if hasattr(n, 'external_ressources'): + if 'external_ressources' in n.props: for type_ressource, urls in n.external_ressources.items(): for url in urls: if url not in self.all_url_requests: @@ -369,39 +369,39 @@ def __init__(self, har_path: Path, capture_uuid: str): # If the body of the response was empty, skip. continue if type_ressource == 'img': - node.add_feature('image', True) + node.add_prop('image', True) if type_ressource == 'script': - node.add_feature('js', True) + node.add_prop('js', True) if type_ressource == 'video': - node.add_feature('video', True) + node.add_prop('video', True) if type_ressource == 'audio': - node.add_feature('audio', True) + node.add_prop('audio', True) if type_ressource == 'iframe': - node.add_feature('iframe', True) + node.add_prop('iframe', True) if type_ressource == 'embed': # FIXME other icon? - node.add_feature('octet_stream', True) + node.add_prop('octet_stream', True) if type_ressource == 'source': # FIXME: Can be audio, video, or picture - node.add_feature('octet_stream', True) + node.add_prop('octet_stream', True) # NOTE: the URL is probably not a CSS # if type_ressource == 'link': # FIXME: Probably a css? - # node.add_feature('css', True) + # node.add_prop('css', True) if type_ressource == 'object': # FIXME: Same as embed, but more things - node.add_feature('octet_stream', True) + node.add_prop('octet_stream', True) self.url_tree = self._nodes_list.pop(0) @property def initial_referer(self) -> str | None: '''The referer passed to the first URL in the tree''' - if hasattr(self.url_tree, 'referer'): + if 'referer' in self.url_tree.props: return self.url_tree.referer return None @@ -442,7 +442,7 @@ def stats(self) -> dict[str, Any]: @property def redirects(self) -> list[str]: """List of redirects for this tree""" - return [a.name for a in reversed(self.rendered_node.get_ancestors())] + [self.rendered_node.name] + return [a.name for a in reversed(list(self.rendered_node.ancestors()))] + [self.rendered_node.name] @property def root_referer(self) -> str | None: @@ -472,7 +472,7 @@ def build_all_hashes(self, algorithm: str='sha1') -> dict[str, list[URLNode]]: h = hashlib.new(algorithm) h.update(urlnode.body.getbuffer()) to_return[h.hexdigest()].append(urlnode) - if hasattr(urlnode, 'embedded_ressources'): + if 'embedded_ressources' in urlnode.props: for _mimetype, blobs in urlnode.embedded_ressources.items(): for blob in blobs: h = hashlib.new(algorithm) @@ -515,22 +515,22 @@ def _load_url_entries(self) -> None: n = URLNode(capture_uuid=self.har.capture_uuid, name=unquote_plus(url_entry['request']['url'])) n.load_har_entry(url_entry, list(self.all_url_requests.keys())) - if hasattr(n, 'redirect_url'): + if 'redirect_url' in n.props: self.all_redirects.append(n.redirect_url) - if hasattr(n, 'initiator_url'): + if 'initiator_url' in n.props: # The HAR file was created by chrome/chromium and we got the _initiator key self.all_initiator_url[n.initiator_url].append(n.name) if url_entry['startedDateTime'] in self.har.pages_start_times: for page in self.har.pages_start_times[url_entry['startedDateTime']]: - if hasattr(n, 'pageref') and page['id'] == n.pageref: + if 'pageref' in n.props and page['id'] == n.pageref: # This node is the root entry of a page. Can be used as a fallback when we build the tree self.pages_root[n.pageref] = n.uuid break # NOTE 2021-05-28: Ignore referer for first entry - if hasattr(n, 'referer') and i > 0: + if 'referer' in n.props and i > 0: # NOTE 2021-05-14: referer to self are a real thing: url -> POST to self if n.name != n.referer or ('method' in n.request and n.request['method'] == 'POST'): self.all_referer[n.referer].append(n.name) @@ -544,27 +544,26 @@ def _load_url_entries(self) -> None: for page in pages: if page['id'] not in self.pages_root: for node in self._nodes_list: - if not hasattr(node, 'pageref'): + if 'pageref' not in node.props: # 2022-11-19: No pageref for this node in the HAR file, # this is weird but we need it as a fallback. - node.add_feature('pageref', page['id']) + node.add_prop('pageref', page['id']) if node.pageref == page['id']: self.pages_root[node.pageref] = node.uuid break def get_host_node_by_uuid(self, uuid: str) -> HostNode: """Returns the node with this UUID from the HostNode tree""" - return self.hostname_tree.search_nodes(uuid=uuid)[0] + return self.hostname_tree.get_first_by_feature('uuid', uuid, expect_missing=False) def get_url_node_by_uuid(self, uuid: str) -> URLNode: """Returns the node with this UUID from the URLNode tree""" - return self.url_tree.search_nodes(uuid=uuid)[0] + return self.url_tree.get_first_by_feature('uuid', uuid, expect_missing=False) @property def rendered_node(self) -> URLNode: - node = self.url_tree.search_nodes(name=self.har.final_redirect) - if node: - return node[0] + if node := self.url_tree.get_first_by_feature('name', self.har.final_redirect, expect_missing=True): + return node browser_errors = ['chrome-error', 'about:blank'] if self.har.final_redirect and not any(self.har.final_redirect.startswith(r) for r in browser_errors): @@ -574,7 +573,7 @@ def rendered_node(self) -> URLNode: pass # Just try to get the best guess: first node after JS/HTTP redirects curnode = self.url_tree - while hasattr(curnode, 'redirect') and curnode.redirect: + while 'redirect' in curnode.props and curnode.redirect: for child in curnode.children: if child.name == curnode.redirect_url: curnode = child @@ -614,7 +613,7 @@ def make_hostname_tree(self, root_nodes_url: URLNode | list[URLNode], root_node_ child_node_hostname.add_url(child_node_url) - if not child_node_url.is_leaf(): + if not child_node_url.is_leaf: sub_roots[child_node_hostname].append(child_node_url) for child_node_hostname, child_nodes_url in sub_roots.items(): @@ -654,13 +653,13 @@ def make_tree(self) -> URLNode: @trace_make_subtree_fallback def _make_subtree_fallback(self, node: URLNode, dev_debug: bool=False) -> None: - if hasattr(node, 'referer'): + if 'referer' in node.props: # 2022-04-28: the node has a referer, but for some reason, it could't be attached to the tree # Probable reason: the referer is a part of the URL (hostname) # FIXME: this is a very dirty fix, but I'm not sure we can do it any better if (referer_hostname := urlparse(node.referer).hostname): # the referer has a hostname - if (nodes_with_hostname := self.url_tree.search_nodes(hostname=referer_hostname)): + if nodes_with_hostname := list(self.url_tree.search_nodes(hostname=referer_hostname)): # the hostname has at least a node in the tree for node_with_hostname in nodes_with_hostname: if not node_with_hostname.empty_response: @@ -685,14 +684,14 @@ def _make_subtree_fallback(self, node: URLNode, dev_debug: bool=False) -> None: if dev_debug: self.logger.warning(f'Failed to attach URLNode in the normal process, attaching node to page {node.pageref} - Node: {page_root_node.uuid} - {page_root_node.name}.') self._make_subtree(page_root_node, [node]) - elif self.url_tree.search_nodes(name=self.har.final_redirect): + elif final_redirect := self.url_tree.get_first_by_feature('name', self.har.final_redirect, expect_missing=True): # Generally, when we have a bunch of redirects, they do not branch out before the final landing page # *but* it is not always the case: some intermediary redirects will have calls to 3rd party pages. # Hopefully, this last case was taken care of in the branch above. # In this branch, we get the landing page after the redirects (if any), and attach the node to it. if dev_debug: self.logger.warning(f'Failed to attach URLNode in the normal process, attaching node to final redirect: {self.har.final_redirect}.') - self._make_subtree(self.url_tree.search_nodes(name=self.har.final_redirect)[0], [node]) + self._make_subtree(final_redirect, [node]) elif 'pages' in self.har.har['log']: # No luck, the node is root for this pageref, let's attach it to the prior page in the list, or the very first node (tree root) page_before = self.har.har['log']['pages'][0] @@ -745,7 +744,7 @@ def _make_subtree(self, root: URLNode, nodes_to_attach: list[URLNode] | None=Non for unode in unodes: # NOTE: as we're calling the method recursively, a node containing URLs in its external_ressources will attach # the the subnodes to itself, even if the subnodes have a different referer. It will often be correct, but not always. - if hasattr(unode, 'redirect') and not hasattr(unode, 'redirect_to_nothing'): + if 'redirect' in unode.props and 'redirect_to_nothing' not in unode.props: # If the subnode has a redirect URL set, we get all the requests matching this URL # One may think the entry related to this redirect URL has a referer to the parent. One would be wrong. # URL 1 has a referer, and redirects to URL 2. URL 2 has the same referer as URL 1. @@ -787,7 +786,9 @@ def _make_subtree(self, root: URLNode, nodes_to_attach: list[URLNode] | None=Non # The URL (unode.name) is in the list of known urls initiating calls for u in self.all_initiator_url[unode.name]: matching_urls = [url_node for url_node in self.all_url_requests[u] - if url_node in self._nodes_list and hasattr(url_node, 'initiator_url') and url_node.initiator_url == unode.name] + if url_node in self._nodes_list + and 'initiator_url' in url_node.props + and url_node.initiator_url == unode.name] self._nodes_list = [node for node in self._nodes_list if node not in matching_urls] if dev_debug: self.logger.warning(f'Found via initiator from {unode.name} to {matching_urls}.') @@ -810,7 +811,7 @@ def _make_subtree(self, root: URLNode, nodes_to_attach: list[URLNode] | None=Non self._nodes_list = [node for node in self._nodes_list if node != url_node] self._make_subtree(unode, [url_node]) - if hasattr(unode, 'external_ressources'): + if 'external_ressources' in unode.props: # the url loads external things, and some of them have no referer.... for external_tag, links in unode.external_ressources.items(): for link in links: diff --git a/har2tree/nodes.py b/har2tree/nodes.py index 4df313a..5e27252 100644 --- a/har2tree/nodes.py +++ b/har2tree/nodes.py @@ -8,8 +8,9 @@ import ipaddress import json import logging -import uuid import re +import uuid +import warnings from base64 import b64decode from datetime import datetime, timedelta @@ -17,13 +18,13 @@ from hashlib import sha256 from io import BytesIO from pathlib import Path -from typing import Any +from typing import Any, overload, Literal from collections.abc import MutableMapping from urllib.parse import unquote_plus, urlparse, urljoin import filetype # type: ignore from bs4 import BeautifulSoup -from ete3 import TreeNode # type: ignore +from ete4 import Tree # type: ignore from publicsuffixlist import PublicSuffixList # type: ignore from w3lib.html import strip_html5_whitespace from w3lib.url import canonicalize_url, safe_url_string @@ -39,24 +40,61 @@ def get_public_suffix_list() -> PublicSuffixList: return PublicSuffixList() -class HarTreeNode(TreeNode): # type: ignore[misc] +class HarTreeNode(Tree): # type: ignore[misc] - def __init__(self, capture_uuid: str, **kwargs: Any): + def __init__(self, capture_uuid: str, name: str | None=None): """Node dumpable in json to display with d3js""" - super().__init__(**kwargs) + super().__init__() logger = logging.getLogger(f'{__name__}.{self.__class__.__name__}') self.logger = Har2TreeLogAdapter(logger, {'uuid': capture_uuid}) - self.add_feature('uuid', str(uuid.uuid4())) - self.features_to_skip = {'dist', 'support'} + self.add_prop('uuid', str(uuid.uuid4())) + if name: + self.add_prop('name', name) + self.features_to_skip: set[str] = set() + + def add_feature(self, feature_name: str, feature_value: Any) -> None: + warnings.warn("Deprecated in ete4, use add_prop instead", DeprecationWarning) + self.add_prop(feature_name, feature_value) + + def __getattr__(self, attribute: str) -> Any: + """Ete3 was storing the properties as attributes in the node, ete4 has them in a dict. + This method allows to simulate the ete3 behavior, but the properties are still stored in the dict. + """ + if attribute in self.props: + warnings.warn("Deprecated in ete4, use get_prop instead", DeprecationWarning) + return self.props[attribute] + return super().__getattr__(attribute) + + @property + def features(self) -> set[str]: + """Deprecated, use props instead""" + warnings.warn("Deprecated in ete4, use props instead", DeprecationWarning) + return set(self.props.keys()) + + @overload + def get_first_by_feature(self, feature_name: str, value: str, /, *, expect_missing: Literal[True]=True) -> HarTreeNode | None: + ... + + @overload + def get_first_by_feature(self, feature_name: str, value: str, /, *, expect_missing: Literal[False]) -> HarTreeNode: + ... + + def get_first_by_feature(self, feature_name: str, value: str, /, *, expect_missing: bool=False) -> HarTreeNode | None: + try: + return next(self.search_nodes(**{feature_name: value})) + except StopIteration: + if expect_missing: + return None + raise Har2TreeError(f'Unable to find feature "{feature_name}": "{value}"') def to_dict(self) -> MutableMapping[str, Any]: """Make a dict that can then be dumped in json. """ to_return = {'uuid': self.uuid, 'children': []} - for feature in self.features: + for feature in self.props: if feature in self.features_to_skip: continue - to_return[feature] = getattr(self, feature) + to_return[feature] = self.props[feature] for child in self.children: to_return['children'].append(child) @@ -69,12 +107,11 @@ def to_json(self) -> str: class URLNode(HarTreeNode): - start_time: datetime - def __init__(self, capture_uuid: str, **kwargs: Any): + def __init__(self, capture_uuid: str, name: str): """Node of the URL Tree""" - super().__init__(capture_uuid=capture_uuid, **kwargs) + super().__init__(capture_uuid=capture_uuid, name=name) # Do not add the body in the json dump self.features_to_skip.add('body') self.features_to_skip.add('url_split') @@ -89,31 +126,31 @@ def _compute_domhash(self) -> str: def add_rendered_features(self, all_requests: list[str], rendered_html: BytesIO | None=None, downloaded_file: tuple[str, BytesIO | None] | None=None) -> None: if rendered_html: - self.add_feature('rendered_html', rendered_html) + self.add_prop('rendered_html', rendered_html) rendered_external, rendered_embedded = find_external_ressources(self.mimetype, self.rendered_html.getvalue(), self.name, all_requests) if hasattr(self, 'external_ressources'): # for the external ressources, the keys are always the same self.external_ressources: dict[str, list[str]] = {initiator_type: urls + rendered_external[initiator_type] for initiator_type, urls in self.external_ressources.items()} else: - self.add_feature('external_ressources', rendered_external) + self.add_prop('external_ressources', rendered_external) - if hasattr(self, 'embedded_ressources'): + if 'embedded_ressources' in self.props: # for the embedded ressources, the keys are the mimetypes, they may not overlap mimetypes = list(self.embedded_ressources.keys()) + list(rendered_embedded.keys()) self.embedded_ressources: dict[str, list[tuple[str, BytesIO]]] = {mimetype: self.embedded_ressources.get(mimetype, []) + rendered_embedded.get(mimetype, []) for mimetype in mimetypes} else: - self.add_feature('embedded_ressources', rendered_embedded) + self.add_prop('embedded_ressources', rendered_embedded) if identifiers := find_identifiers(self.rendered_soup): - self.add_feature('identifiers', identifiers) + self.add_prop('identifiers', identifiers) if domhash := self._compute_domhash(): - self.add_feature('domhash', domhash) + self.add_prop('domhash', domhash) if downloaded_file: downloaded_filename, downloaded_file_data = downloaded_file - self.add_feature('downloaded_file', downloaded_file_data) - self.add_feature('downloaded_filename', downloaded_filename) + self.add_prop('downloaded_file', downloaded_file_data) + self.add_prop('downloaded_filename', downloaded_filename) def _dirty_safe_b64decode(self, to_decode: str | bytes) -> bytes: if isinstance(to_decode, str): @@ -136,80 +173,80 @@ def load_har_entry(self, har_entry: MutableMapping[str, Any], all_requests: list if not self.name: # We're in the actual root node # NOTE: by the HAR specs: "Absolute URL of the request (fragments are not included)." - self.add_feature('name', unquote_plus(har_entry['request']['url'])) + self.add_prop('name', unquote_plus(har_entry['request']['url'])) splitted_url = urlparse(self.name) if splitted_url.scheme == 'blob': # this is a new weird feature, but it seems to be usable as a URL, so let's do that - self.add_feature('url_split', urlparse(splitted_url.path)) + self.add_prop('url_split', urlparse(splitted_url.path)) elif splitted_url.scheme == 'file': # file on disk, we do not have a proper URL - self.add_feature('file_on_disk', True) - self.add_feature('url_split', urlparse(splitted_url.path)) + self.add_prop('file_on_disk', True) + self.add_prop('url_split', urlparse(splitted_url.path)) else: - self.add_feature('url_split', splitted_url) + self.add_prop('url_split', splitted_url) # If the URL contains a fragment (i.e. something after a #), it is stripped in the referer. # So we need an alternative URL to do a lookup against - self.add_feature('alternative_url_for_referer', self.name.split('#')[0]) + self.add_prop('alternative_url_for_referer', self.name.split('#')[0]) if '.' in har_entry['startedDateTime']: - self.add_feature('start_time', datetime.strptime(har_entry['startedDateTime'], '%Y-%m-%dT%H:%M:%S.%f%z')) + self.add_prop('start_time', datetime.strptime(har_entry['startedDateTime'], '%Y-%m-%dT%H:%M:%S.%f%z')) else: - self.add_feature('start_time', datetime.strptime(har_entry['startedDateTime'], '%Y-%m-%dT%H:%M:%S%z')) + self.add_prop('start_time', datetime.strptime(har_entry['startedDateTime'], '%Y-%m-%dT%H:%M:%S%z')) if 'pageref' in har_entry: - self.add_feature('pageref', har_entry['pageref']) + self.add_prop('pageref', har_entry['pageref']) - self.add_feature('time', timedelta(milliseconds=har_entry['time'])) - self.add_feature('time_content_received', self.start_time + self.time) # Instant the response is fully received (and the processing of the content by the browser can start) + self.add_prop('time', timedelta(milliseconds=har_entry['time'])) + self.add_prop('time_content_received', self.start_time + self.time) # Instant the response is fully received (and the processing of the content by the browser can start) - if hasattr(self, 'file_on_disk'): + if 'file_on_disk' in self.props: # TODO: Do something better? hostname is the feature name used for the aggregated tree # so we need that unless we want to change the JS - self.add_feature('hostname', str(Path(self.url_split.path).parent)) + self.add_prop('hostname', str(Path(self.url_split.path).parent)) else: if self.url_split.hostname: - self.add_feature('hostname', self.url_split.hostname) + self.add_prop('hostname', self.url_split.hostname) else: - self.add_feature('hostname', self.name) + self.add_prop('hostname', self.name) if not self.hostname: self.logger.warning(f'Something is broken in that node: {har_entry}') try: ipaddress.ip_address(self.hostname) - self.add_feature('hostname_is_ip', True) + self.add_prop('hostname_is_ip', True) except ValueError: # Not an IP pass - if not hasattr(self, 'hostname_is_ip'): + if 'hostname_is_ip' not in self.props or not self.hostname_is_ip: try: # attempt to decode if the hostname is idna encoded idna_decoded = self.hostname.encode().decode('idna') if idna_decoded != self.hostname: - self.add_feature('idna', idna_decoded) + self.add_prop('idna', idna_decoded) except UnicodeError: pass - if not hasattr(self, 'hostname_is_ip') and not hasattr(self, 'file_on_disk'): + if 'hostname_is_ip' not in self.props and 'file_on_disk' not in self.props: tld = get_public_suffix_list().publicsuffix(self.hostname) if tld: - self.add_feature('known_tld', tld) + self.add_prop('known_tld', tld) else: self.logger.debug(f'No TLD/domain broken {self.name}') - self.add_feature('request', har_entry['request']) + self.add_prop('request', har_entry['request']) # Try to get a referer from the headers for h in self.request['headers']: if h['name'].lower() == 'referer': - self.add_feature('referer', unquote_plus(h['value'])) + self.add_prop('referer', unquote_plus(h['value'])) if h['name'].lower() == 'user-agent': - self.add_feature('user_agent', h['value']) + self.add_prop('user_agent', h['value']) - if 'user_agent' not in self.features: - self.add_feature('user_agent', '') + if 'user_agent' not in self.props: + self.add_prop('user_agent', '') if 'method' in self.request and self.request['method'] == 'POST' and 'postData' in self.request: # If the content is empty, we don't care @@ -294,22 +331,22 @@ def load_har_entry(self, har_entry: MutableMapping[str, Any], all_requests: list decoded_posted_data = decoded_posted_data.decode('utf-8') except Exception: pass - self.add_feature('posted_data', decoded_posted_data) + self.add_prop('posted_data', decoded_posted_data) - self.add_feature('response', har_entry['response']) + self.add_prop('response', har_entry['response']) try: - self.add_feature('hhhash', make_hhhash(self.response)) + self.add_prop('hhhash', make_hhhash(self.response)) except HHHashNote as e: self.logger.debug(e) except HHHashError as e: self.logger.warning(e) - self.add_feature('response_cookie', self.response['cookies']) + self.add_prop('response_cookie', self.response['cookies']) if self.response_cookie: - self.add_feature('set_third_party_cookies', False) + self.add_prop('set_third_party_cookies', False) # https://developer.mozilla.org/en-US/docs/Web/HTTP/headers/Set-Cookie # Cookie name must not contain "=", so we can use it safely - self.add_feature('cookies_received', []) + self.add_prop('cookies_received', []) for cookie in self.response_cookie: is_3rd_party = False # If the domain is set, the cookie will be sent in any request to that domain, and any related subdomains @@ -322,58 +359,58 @@ def load_har_entry(self, har_entry: MutableMapping[str, Any], all_requests: list else: cookie_domain = self.hostname if not self.hostname.endswith(cookie_domain): - self.add_feature('set_third_party_cookies', True) + self.add_prop('set_third_party_cookies', True) is_3rd_party = True self.cookies_received.append((cookie_domain, f'{cookie["name"]}={cookie["value"]}', is_3rd_party)) - self.add_feature('request_cookie', har_entry['request']['cookies']) + self.add_prop('request_cookie', har_entry['request']['cookies']) if self.request_cookie: # https://developer.mozilla.org/en-US/docs/Web/HTTP/headers/Set-Cookie # Cookie name must not contain "=", so we can use it safely # The content of this feature is initialized in Har2Tree.__init__ # And it contains a reference to the URL Node the cookies comes from initially # (the cookie was in the response of that request) - self.add_feature('cookies_sent', {}) + self.add_prop('cookies_sent', {}) for cookie in self.request_cookie: self.cookies_sent[f'{cookie["name"]}={cookie["value"]}'] = [] if not self.response['content'].get('text') or self.response['content']['text'] == '': # If the content of the response is empty, skip. - self.add_feature('empty_response', True) - self.add_feature('mimetype', 'inode/x-empty') + self.add_prop('empty_response', True) + self.add_prop('mimetype', 'inode/x-empty') else: - self.add_feature('empty_response', False) + self.add_prop('empty_response', False) if self.response['content'].get('encoding') == 'base64': try: - self.add_feature('body', BytesIO(self._dirty_safe_b64decode(self.response['content']['text']))) + self.add_prop('body', BytesIO(self._dirty_safe_b64decode(self.response['content']['text']))) except binascii.Error: - self.add_feature('body', BytesIO(self.response['content']['text'].encode())) + self.add_prop('body', BytesIO(self.response['content']['text'].encode())) else: - self.add_feature('body', BytesIO(self.response['content']['text'].encode())) + self.add_prop('body', BytesIO(self.response['content']['text'].encode())) - self.add_feature('body_hash', hashlib.sha512(self.body.getvalue()).hexdigest()) + self.add_prop('body_hash', hashlib.sha512(self.body.getvalue()).hexdigest()) if self.response['content']['mimeType']: mt = self.response['content']['mimeType'].lower() if mt not in ["application/octet-stream", "x-unknown"]: - self.add_feature('mimetype', mt) + self.add_prop('mimetype', mt) - if not hasattr(self, 'mimetype'): + if 'mimetype' not in self.props: # try to guess something better if kind := filetype.guess(self.body.getvalue()): - self.add_feature('mimetype', kind.mime) + self.add_prop('mimetype', kind.mime) - if not hasattr(self, 'mimetype'): - self.add_feature('mimetype', '') + if 'mimetype' not in self.props: + self.add_prop('mimetype', '') external_ressources, embedded_ressources = find_external_ressources(self.mimetype, self.body.getvalue(), self.name, all_requests) - self.add_feature('external_ressources', external_ressources) - self.add_feature('embedded_ressources', embedded_ressources) + self.add_prop('external_ressources', external_ressources) + self.add_prop('embedded_ressources', embedded_ressources) filename = Path(self.url_split.path).name if filename: - self.add_feature('filename', filename) + self.add_prop('filename', filename) else: - self.add_feature('filename', 'file.bin') + self.add_prop('filename', 'file.bin') # Common JS redirect we can catch easily # NOTE: it is extremely fragile and doesn't work very often but is kinda better than nothing. @@ -384,14 +421,14 @@ def load_har_entry(self, har_entry: MutableMapping[str, Any], all_requests: list # TODO: new type, redirect_js or something like that redirect_url = rebuild_url(self.name, m[9].decode(), all_requests) if redirect_url in all_requests: - self.add_feature('redirect', True) - self.add_feature('redirect_url', redirect_url) + self.add_prop('redirect', True) + self.add_prop('redirect_url', redirect_url) if 'meta_refresh' in self.external_ressources and self.external_ressources.get('meta_refresh'): if self.external_ressources['meta_refresh'][0] in all_requests: # TODO: new type, redirect_html or something like that - self.add_feature('redirect', True) - self.add_feature('redirect_url', self.external_ressources['meta_refresh'][0]) + self.add_prop('redirect', True) + self.add_prop('redirect_url', self.external_ressources['meta_refresh'][0]) # NOTE: Chrome/Chromium/Playwright only feature if har_entry.get('serverIPAddress'): @@ -400,18 +437,18 @@ def load_har_entry(self, har_entry: MutableMapping[str, Any], all_requests: list _ipaddress = har_entry['serverIPAddress'][1:-1] else: _ipaddress = har_entry['serverIPAddress'] - self.add_feature('ip_address', ipaddress.ip_address(_ipaddress)) + self.add_prop('ip_address', ipaddress.ip_address(_ipaddress)) # NOTE: Chrome/Chromium only feature if '_initiator' in har_entry: if har_entry['_initiator']['type'] == 'other': pass elif har_entry['_initiator']['type'] == 'parser' and har_entry['_initiator']['url']: - self.add_feature('initiator_url', unquote_plus(har_entry['_initiator']['url'])) + self.add_prop('initiator_url', unquote_plus(har_entry['_initiator']['url'])) elif har_entry['_initiator']['type'] == 'script': url_stack = self._find_initiator_in_stack(har_entry['_initiator']['stack']) if url_stack: - self.add_feature('initiator_url', url_stack) + self.add_prop('initiator_url', url_stack) elif har_entry['_initiator']['type'] == 'redirect': # FIXME: Need usecase raise Exception(f'Got a redirect! - {har_entry}') @@ -425,20 +462,20 @@ def load_har_entry(self, har_entry: MutableMapping[str, Any], all_requests: list har_entry['_securityDetails']['validFrom'] = datetime.fromtimestamp(har_entry['_securityDetails']['validFrom']) if 'validTo' in har_entry['_securityDetails']: har_entry['_securityDetails']['validTo'] = datetime.fromtimestamp(har_entry['_securityDetails']['validTo']) - self.add_feature('security_details', har_entry['_securityDetails']) + self.add_prop('security_details', har_entry['_securityDetails']) if self.response['redirectURL']: - self.add_feature('redirect', True) + self.add_prop('redirect', True) redirect_url = self.response['redirectURL'] # Rebuild the redirect URL so it matches the entry that sould be in all_requests redirect_url = rebuild_url(self.name, redirect_url, all_requests) # At this point, we should have a URL available in all_requests... if redirect_url in all_requests: - self.add_feature('redirect_url', redirect_url) + self.add_prop('redirect_url', redirect_url) else: # ..... Or not. Unable to find a URL for this redirect - self.add_feature('redirect_to_nothing', True) - self.add_feature('redirect_url', self.response['redirectURL']) + self.add_prop('redirect_to_nothing', True) + self.add_prop('redirect_url', self.response['redirectURL']) self.logger.warning('Unable to find that URL: {original_url} - {original_redirect} - {modified_redirect}'.format( original_url=self.name, original_redirect=self.response['redirectURL'], @@ -455,9 +492,9 @@ def _find_initiator_in_stack(self, stack: MutableMapping[str, Any]) -> str | Non @property def resources_hashes(self) -> set[str]: all_ressources_hashes = set() - if 'body_hash' in self.features: + if 'body_hash' in self.props: all_ressources_hashes.add(self.body_hash) - if 'embedded_ressources' in self.features: + if 'embedded_ressources' in self.props: for _mimetype, blobs in self.embedded_ressources.items(): all_ressources_hashes.update([h for h, b in blobs]) return all_ressources_hashes @@ -477,7 +514,7 @@ def _sanitize(maybe_url: str) -> str | None: return None return href - if not hasattr(self, 'rendered_html') or not self.rendered_html: + if 'rendered_html' not in self.props or not self.rendered_html: raise Har2TreeError('Not the node of a page rendered, invalid request.') urls: set[str] = set() @@ -513,16 +550,16 @@ def rendered_soup(self) -> BeautifulSoup: class HostNode(HarTreeNode): - def __init__(self, capture_uuid: str, **kwargs: Any): + def __init__(self, capture_uuid: str, name: str | None =None): """Node of the Hostname Tree""" - super().__init__(capture_uuid=capture_uuid, **kwargs) + super().__init__(capture_uuid=capture_uuid, name=name) # Do not add the URLs in the json dump self.features_to_skip.add('urls') - self.add_feature('urls', []) - self.add_feature('http_content', False) - self.add_feature('https_content', False) - self.add_feature('contains_rendered_urlnode', False) + self.add_prop('urls', []) + self.add_prop('http_content', False) + self.add_prop('https_content', False) + self.add_prop('contains_rendered_urlnode', False) self.cookies_sent: set[str] = set() self.cookies_received: set[tuple[str, str, bool]] = set() @@ -566,34 +603,34 @@ def third_party_cookies_received(self) -> int: def add_url(self, url: URLNode) -> None: """Add a URL node to the Host node, initialize/update the features""" if not self.name: - self.add_feature('name', url.hostname) - if hasattr(url, 'idna'): - self.add_feature('idna', url.idna) + self.add_prop('name', url.hostname) + if 'idna' in url.props: + self.add_prop('idna', url.idna) - if hasattr(url, 'hostname_is_ip') and url.hostname_is_ip: - self.add_feature('hostname_is_ip', True) + if 'hostname_is_ip' in url.props and url.hostname_is_ip: + self.add_prop('hostname_is_ip', True) self.urls.append(url) # Add to URLNode a reference to the HostNode UUID - url.add_feature('hostnode_uuid', self.uuid) + url.add_prop('hostnode_uuid', self.uuid) - if hasattr(url, 'rendered_html') or hasattr(url, 'downloaded_filename'): - self.contains_rendered_urlnode = True - if hasattr(url, 'downloaded_filename'): - self.add_feature('downloaded_filename', url.downloaded_filename) + if 'rendered_html' in url.props or 'downloaded_filename' in url.props: + self.add_prop('contains_rendered_urlnode', True) + if 'downloaded_filename' in url.props: + self.add_prop('downloaded_filename', url.downloaded_filename) - if hasattr(url, 'cookies_sent'): + if 'cookies_sent' in url.props: # Keep a set of cookies sent: different URLs will send the same cookie self.cookies_sent.update(set(url.cookies_sent.keys())) - if hasattr(url, 'cookies_received'): + if 'cookies_received' in url.props: # Keep a set of cookies received: different URLs will receive the same cookie self.cookies_received.update({(domain, cookie, is_3rd_party) for domain, cookie, is_3rd_party in url.cookies_received}) if url.name.startswith('http://'): - self.http_content = True + self.add_prop('http_content', True) elif url.name.startswith('https://'): - self.https_content = True + self.add_prop('https_content', True) def harnode_json_default(obj: HarTreeNode) -> MutableMapping[str, Any]: diff --git a/poetry.lock b/poetry.lock index acab55f..b9d642f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -54,14 +54,160 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "bottle" +version = "0.13.4" +description = "Fast and simple WSGI-framework for small web-applications." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "bottle-0.13.4-py2.py3-none-any.whl", hash = "sha256:045684fbd2764eac9cdeb824861d1551d113e8b683d8d26e296898d3dd99a12e"}, + {file = "bottle-0.13.4.tar.gz", hash = "sha256:787e78327e12b227938de02248333d788cfe45987edca735f8f88e03472c3f47"}, +] + +[[package]] +name = "brotli" +version = "1.1.0" +description = "Python bindings for the Brotli compression library" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752"}, + {file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9"}, + {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ae56aca0402a0f9a3431cddda62ad71666ca9d4dc3a10a142b9dce2e3c0cda3"}, + {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43ce1b9935bfa1ede40028054d7f48b5469cd02733a365eec8a329ffd342915d"}, + {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7c4855522edb2e6ae7fdb58e07c3ba9111e7621a8956f481c68d5d979c93032e"}, + {file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:38025d9f30cf4634f8309c6874ef871b841eb3c347e90b0851f63d1ded5212da"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6a904cb26bfefc2f0a6f240bdf5233be78cd2488900a2f846f3c3ac8489ab80"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"}, + {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"}, + {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"}, + {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"}, + {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"}, + {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61"}, + {file = "Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"}, + {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"}, + {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"}, + {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"}, + {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408"}, + {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"}, + {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"}, + {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"}, + {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"}, + {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"}, + {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"}, + {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"}, + {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"}, + {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"}, + {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"}, + {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"}, + {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"}, + {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4d4a848d1837973bf0f4b5e54e3bec977d99be36a7895c61abb659301b02c112"}, + {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fdc3ff3bfccdc6b9cc7c342c03aa2400683f0cb891d46e94b64a197910dc4064"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:5eeb539606f18a0b232d4ba45adccde4125592f3f636a6182b4a8a436548b914"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"}, + {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"}, + {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"}, + {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"}, + {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"}, + {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f733d788519c7e3e71f0855c96618720f5d3d60c3cb829d8bbb722dddce37985"}, + {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:929811df5462e182b13920da56c6e0284af407d1de637d8e536c5cd00a7daf60"}, + {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b63b949ff929fbc2d6d3ce0e924c9b93c9785d877a21a1b678877ffbbc4423a"}, + {file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d192f0f30804e55db0d0e0a35d83a9fead0e9a359a9ed0285dbacea60cc10a84"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f296c40e23065d0d6650c4aefe7470d2a25fffda489bcc3eb66083f3ac9f6643"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"}, + {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"}, + {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"}, + {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"}, + {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"}, + {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:03d20af184290887bdea3f0f78c4f737d126c74dc2f3ccadf07e54ceca3bf208"}, + {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6172447e1b368dcbc458925e5ddaf9113477b0ed542df258d84fa28fc45ceea7"}, + {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a743e5a28af5f70f9c080380a5f908d4d21d40e8f0e0c8901604d15cfa9ba751"}, + {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0541e747cce78e24ea12d69176f6a7ddb690e62c425e01d31cc065e69ce55b48"}, + {file = "Brotli-1.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cdbc1fc1bc0bff1cef838eafe581b55bfbffaed4ed0318b724d0b71d4d377619"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:890b5a14ce214389b2cc36ce82f3093f96f4cc730c1cffdbefff77a7c71f2a97"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"}, + {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"}, + {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"}, + {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"}, + {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"}, + {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7905193081db9bfa73b1219140b3d315831cbff0d8941f22da695832f0dd188f"}, + {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a77def80806c421b4b0af06f45d65a136e7ac0bdca3c09d9e2ea4e515367c7e9"}, + {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dadd1314583ec0bf2d1379f7008ad627cd6336625d6679cf2f8e67081b83acf"}, + {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:901032ff242d479a0efa956d853d16875d42157f98951c0230f69e69f9c09bac"}, + {file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:22fc2a8549ffe699bfba2256ab2ed0421a7b8fadff114a3d201794e45a9ff578"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae15b066e5ad21366600ebec29a7ccbc86812ed267e4b28e860b8ca16a2bc474"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"}, + {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"}, + {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"}, + {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"}, + {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"}, +] + [[package]] name = "certifi" version = "2025.7.14" description = "Python package for providing Mozilla's CA Bundle." -optional = true +optional = false python-versions = ">=3.7" groups = ["main"] -markers = "python_version >= \"3.11\" and extra == \"docs\"" files = [ {file = "certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2"}, {file = "certifi-2025.7.14.tar.gz", hash = "sha256:8ea99dbdfaaf2ba2f9bac77b9249ef62ec5218e7c2b2e903378ed5fccf765995"}, @@ -279,16 +425,30 @@ files = [ ] [[package]] -name = "ete3" -version = "3.1.3" +name = "ete4" +version = "4.3.0" description = "A Python Environment for (phylogenetic) Tree Exploration" optional = false -python-versions = "*" +python-versions = ">=3.7" groups = ["main"] files = [ - {file = "ete3-3.1.3.tar.gz", hash = "sha256:06a3b7fa8ed90187b076a8dbbe5b1b62acee94201d3c6e822f55f449601ef6f2"}, + {file = "ete4-4.3.0.tar.gz", hash = "sha256:c063588a9c77aa16a3de93de4d2016afae0236d4daf325d6290ed668d15a4b86"}, ] +[package.dependencies] +bottle = "*" +brotli = "*" +numpy = "*" +requests = "*" +scipy = "*" + +[package.extras] +doc = ["sphinx"] +render-sm = ["selenium"] +test = ["pytest (>=6.0)"] +treediff = ["lap"] +treeview = ["pyqt6"] + [[package]] name = "exceptiongroup" version = "1.3.0" @@ -324,10 +484,9 @@ files = [ name = "idna" version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" -optional = true +optional = false python-versions = ">=3.6" groups = ["main"] -markers = "python_version >= \"3.11\" and extra == \"docs\"" files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -928,10 +1087,9 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] name = "requests" version = "2.32.4" description = "Python HTTP for Humans." -optional = true +optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_version >= \"3.11\" and extra == \"docs\"" files = [ {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, @@ -964,6 +1122,171 @@ files = [ lint = ["mypy (==1.15.0)", "pyright (==1.1.394)", "ruff (==0.9.7)"] test = ["pytest (>=8)"] +[[package]] +name = "scipy" +version = "1.13.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version == \"3.9\"" +files = [ + {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, + {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f"}, + {file = "scipy-1.13.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94"}, + {file = "scipy-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa"}, + {file = "scipy-1.13.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59"}, + {file = "scipy-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884"}, + {file = "scipy-1.13.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16"}, + {file = "scipy-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d"}, + {file = "scipy-1.13.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c"}, + {file = "scipy-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2"}, + {file = "scipy-1.13.1.tar.gz", hash = "sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] +test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "scipy" +version = "1.15.3" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +markers = "python_version == \"3.10\"" +files = [ + {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"}, + {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"}, + {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f"}, + {file = "scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92"}, + {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82"}, + {file = "scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40"}, + {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e"}, + {file = "scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c"}, + {file = "scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65"}, + {file = "scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1"}, + {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889"}, + {file = "scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982"}, + {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9"}, + {file = "scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594"}, + {file = "scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477"}, + {file = "scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c"}, + {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45"}, + {file = "scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49"}, + {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e"}, + {file = "scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539"}, + {file = "scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb"}, + {file = "scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730"}, + {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825"}, + {file = "scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7"}, + {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11"}, + {file = "scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126"}, + {file = "scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e"}, + {file = "scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb"}, + {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723"}, + {file = "scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb"}, + {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4"}, + {file = "scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5"}, + {file = "scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca"}, + {file = "scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf"}, +] + +[package.dependencies] +numpy = ">=1.23.5,<2.5" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.0.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.0,<2.1.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "scipy" +version = "1.16.0" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.11" +groups = ["main"] +markers = "python_version >= \"3.11\"" +files = [ + {file = "scipy-1.16.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:deec06d831b8f6b5fb0b652433be6a09db29e996368ce5911faf673e78d20085"}, + {file = "scipy-1.16.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d30c0fe579bb901c61ab4bb7f3eeb7281f0d4c4a7b52dbf563c89da4fd2949be"}, + {file = "scipy-1.16.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:b2243561b45257f7391d0f49972fca90d46b79b8dbcb9b2cb0f9df928d370ad4"}, + {file = "scipy-1.16.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:e6d7dfc148135e9712d87c5f7e4f2ddc1304d1582cb3a7d698bbadedb61c7afd"}, + {file = "scipy-1.16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:90452f6a9f3fe5a2cf3748e7be14f9cc7d9b124dce19667b54f5b429d680d539"}, + {file = "scipy-1.16.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a2f0bf2f58031c8701a8b601df41701d2a7be17c7ffac0a4816aeba89c4cdac8"}, + {file = "scipy-1.16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6c4abb4c11fc0b857474241b812ce69ffa6464b4bd8f4ecb786cf240367a36a7"}, + {file = "scipy-1.16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b370f8f6ac6ef99815b0d5c9f02e7ade77b33007d74802efc8316c8db98fd11e"}, + {file = "scipy-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:a16ba90847249bedce8aa404a83fb8334b825ec4a8e742ce6012a7a5e639f95c"}, + {file = "scipy-1.16.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:7eb6bd33cef4afb9fa5f1fb25df8feeb1e52d94f21a44f1d17805b41b1da3180"}, + {file = "scipy-1.16.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:1dbc8fdba23e4d80394ddfab7a56808e3e6489176d559c6c71935b11a2d59db1"}, + {file = "scipy-1.16.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:7dcf42c380e1e3737b343dec21095c9a9ad3f9cbe06f9c05830b44b1786c9e90"}, + {file = "scipy-1.16.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26ec28675f4a9d41587266084c626b02899db373717d9312fa96ab17ca1ae94d"}, + {file = "scipy-1.16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:952358b7e58bd3197cfbd2f2f2ba829f258404bdf5db59514b515a8fe7a36c52"}, + {file = "scipy-1.16.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03931b4e870c6fef5b5c0970d52c9f6ddd8c8d3e934a98f09308377eba6f3824"}, + {file = "scipy-1.16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:512c4f4f85912767c351a0306824ccca6fd91307a9f4318efe8fdbd9d30562ef"}, + {file = "scipy-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e69f798847e9add03d512eaf5081a9a5c9a98757d12e52e6186ed9681247a1ac"}, + {file = "scipy-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:adf9b1999323ba335adc5d1dc7add4781cb5a4b0ef1e98b79768c05c796c4e49"}, + {file = "scipy-1.16.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:e9f414cbe9ca289a73e0cc92e33a6a791469b6619c240aa32ee18abdce8ab451"}, + {file = "scipy-1.16.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:bbba55fb97ba3cdef9b1ee973f06b09d518c0c7c66a009c729c7d1592be1935e"}, + {file = "scipy-1.16.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:58e0d4354eacb6004e7aa1cd350e5514bd0270acaa8d5b36c0627bb3bb486974"}, + {file = "scipy-1.16.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:75b2094ec975c80efc273567436e16bb794660509c12c6a31eb5c195cbf4b6dc"}, + {file = "scipy-1.16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b65d232157a380fdd11a560e7e21cde34fdb69d65c09cb87f6cc024ee376351"}, + {file = "scipy-1.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d8747f7736accd39289943f7fe53a8333be7f15a82eea08e4afe47d79568c32"}, + {file = "scipy-1.16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eb9f147a1b8529bb7fec2a85cf4cf42bdfadf9e83535c309a11fdae598c88e8b"}, + {file = "scipy-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d2b83c37edbfa837a8923d19c749c1935ad3d41cf196006a24ed44dba2ec4358"}, + {file = "scipy-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:79a3c13d43c95aa80b87328a46031cf52508cf5f4df2767602c984ed1d3c6bbe"}, + {file = "scipy-1.16.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:f91b87e1689f0370690e8470916fe1b2308e5b2061317ff76977c8f836452a47"}, + {file = "scipy-1.16.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:88a6ca658fb94640079e7a50b2ad3b67e33ef0f40e70bdb7dc22017dae73ac08"}, + {file = "scipy-1.16.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:ae902626972f1bd7e4e86f58fd72322d7f4ec7b0cfc17b15d4b7006efc385176"}, + {file = "scipy-1.16.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:8cb824c1fc75ef29893bc32b3ddd7b11cf9ab13c1127fe26413a05953b8c32ed"}, + {file = "scipy-1.16.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:de2db7250ff6514366a9709c2cba35cb6d08498e961cba20d7cff98a7ee88938"}, + {file = "scipy-1.16.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e85800274edf4db8dd2e4e93034f92d1b05c9421220e7ded9988b16976f849c1"}, + {file = "scipy-1.16.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4f720300a3024c237ace1cb11f9a84c38beb19616ba7c4cdcd771047a10a1706"}, + {file = "scipy-1.16.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aad603e9339ddb676409b104c48a027e9916ce0d2838830691f39552b38a352e"}, + {file = "scipy-1.16.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f56296fefca67ba605fd74d12f7bd23636267731a72cb3947963e76b8c0a25db"}, + {file = "scipy-1.16.0.tar.gz", hash = "sha256:b5ef54021e832869c8cfb03bc3bf20366cbcd426e02a58e8a58d7584dfbb8f62"}, +] + +[package.dependencies] +numpy = ">=1.25.2,<2.6" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "linkify-it-py", "matplotlib (>=3.5)", "myst-nb (>=1.2.0)", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.2.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.3.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + [[package]] name = "six" version = "1.17.0" @@ -1250,10 +1573,9 @@ files = [ name = "urllib3" version = "2.5.0" description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "python_version >= \"3.11\" and extra == \"docs\"" files = [ {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, @@ -1295,4 +1617,4 @@ docs = ["Sphinx", "six"] [metadata] lock-version = "2.1" python-versions = ">=3.9" -content-hash = "c175a0489f97ccae954029c8be069933edb86dd32884d392eaaf43207bb97e3b" +content-hash = "0fffdb15a5dcf13314de58504ad8912d0fd3680324c87c78ee8d0902cac86927" diff --git a/pyproject.toml b/pyproject.toml index 7d7c851..06a8f1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ requires-python = ">=3.9" dynamic = [ "classifiers" ] dependencies = [ - "ete3 (>=3.1.3)", + "ete4 (>=4.3.0)", "beautifulsoup4[charset-normalizer,lxml] (>=4.13.4)", "publicsuffixlist (>=1.0.2.20250719)", "filetype (>=1.2.0)", diff --git a/tests/simple_test.py b/tests/simple_test.py index fa7b476..f72db10 100644 --- a/tests/simple_test.py +++ b/tests/simple_test.py @@ -131,7 +131,7 @@ def test_rebuild_url_partial_double_slash(self) -> None: self.assertEqual(rebuild_url_double_slash, 'https://www.youtube.com/watch?v=iwGFalTRHDA') def test_hostname_tree_features(self) -> None: - self.assertEqual(self.http_redirect_ct.root_hartree.hostname_tree.features, {'name', 'http_content', 'https_content', 'support', 'dist', + self.assertEqual(self.http_redirect_ct.root_hartree.hostname_tree.features, {'name', 'http_content', 'https_content', 'contains_rendered_urlnode', 'urls', 'uuid'}) self.assertTrue('meta_refresh' in self.http_redirect_ct.root_hartree.url_tree.external_ressources) self.assertEqual(self.http_redirect_ct.root_hartree.url_tree.external_ressources['meta_refresh'][0], 'https://www.youtube.com/watch?v=iwGFalTRHDA') @@ -243,7 +243,7 @@ def test_third_party_cookies_received(self) -> None: def test_hostnode_to_json(self) -> None: # Easiest way to test the to_json method without having a huge string here is extracting one from a file - # This file is already cleaned, no UUIDs (see) + # This file is already cleaned, no UUIDs with open(self.test_dir / 'iframe' / 'to_json.json') as json_file: expected_dict = json.load(json_file)