diff --git a/biothings/cli/__init__.py b/biothings/cli/__init__.py index 2113ecd9..2dbe6f2c 100644 --- a/biothings/cli/__init__.py +++ b/biothings/cli/__init__.py @@ -14,6 +14,7 @@ from biothings.cli.commands.admin import build_admin_application from biothings.cli.commands.config import config_application, load_configuration from biothings.cli.commands.dataplugin import dataplugin_application +from biothings.cli.commands.pathing import path_application def setup_logging_configuration(logging_level: Literal[10, 20, 30, 40, 50]) -> None: @@ -63,4 +64,5 @@ def main(): admin_application.add_typer(dataplugin_application, name="dataplugin") admin_application.add_typer(config_application, name="config") + admin_application.add_typer(path_application, name="path") return admin_application() diff --git a/biothings/cli/commands/config.py b/biothings/cli/commands/config.py index 3f035a5b..1f2df74b 100644 --- a/biothings/cli/commands/config.py +++ b/biothings/cli/commands/config.py @@ -268,6 +268,13 @@ def default_biothings_configuration() -> dict: "HUB_MAX_WORKERS": os.cpu_count(), "MAX_QUEUED_JOBS": 1000 } + + # specific attributes to the biothings-cli application + cli_configuration = { + "BIOTHINGS_CLI_PATH": "biothings_hub/path", + } + configuration.update(cli_configuration) + return configuration diff --git a/biothings/cli/commands/decorators.py b/biothings/cli/commands/decorators.py new file mode 100644 index 00000000..41b39610 --- /dev/null +++ b/biothings/cli/commands/decorators.py @@ -0,0 +1,136 @@ +""" +Collection of decorators for usage within the biothings-cli + +These are often method we want associated with many of the plugin methods we +use, but don't directly impact the logic of the actual operation. Typically things +related to paths and configurations that apply to large swaths of the cli +would make sense as a decorator +""" + +import functools +import inspect +import logging +import pathlib +import sys +from typing import Callable + +from biothings.cli.exceptions import MissingPluginName + + +logger = logging.getLogger(name="biothings-cli") + + +def operation_mode(operation: Callable): + """ + Based off the directory structure for where the biothings-cli + was invoked we set the "mode" to one of two states: + + 0) singular + The current working directory contains a singular data-plugin + + In this case we don't require a plugin_name argument to be passed + at the command-line + + 1) hub + The current working directory contains N directories operating as a + "hub" or collection of data-plugins under one umbrella + + In this case we do require a plugin_name argument to be passed + at the command-line. Otherwise we have no idea which data-plugin to + refer to + + We attempt to load the plugin from this working directory. If we sucessfully load + either a manifest or advanced plugin, then we can safely say this is a singular + dataplugin + + If we cannot load either a manifest or advanced plugin then we default assume that + the mode is hub + """ + + @functools.wraps(operation) + def determine_operation_mode(*args, **kwargs): + + def determine_hub_mode(): + working_directory = pathlib.Path.cwd() + working_directory_files = {file.name for file in working_directory.iterdir()} + + mode = None + if "manifest.json" in working_directory_files or "manifest.yaml" in working_directory_files: + logger.debug("Inferring singular manifest plugin from directory structure") + mode = "SINGULAR" + elif "__init__.py" in working_directory_files: + logger.debug("Inferring singular advanced plugin from directory structure") + mode = "SINGULAR" + else: + logger.debug("Inferring multiple plugins from directory structure") + mode = "HUB" + + if mode == "SINGULAR": + if kwargs.get("plugin_name", None) is not None: + kwargs["plugin_name"] = None + elif mode == "HUB": + if kwargs.get("plugin_name", None) is None: + raise MissingPluginName(working_directory) + + @functools.wraps(operation) + def handle_function(*args, **kwargs): + operation_result = operation(*args, **kwargs) + return operation_result + + @functools.wraps(operation) + async def handle_corountine(*args, **kwargs): + operation_result = await operation(*args, **kwargs) + return operation_result + + determine_hub_mode() + + if inspect.iscoroutinefunction(operation): + return handle_corountine(*args, **kwargs) + else: + return handle_function(*args, **kwargs) + + return determine_operation_mode + + +def cli_system_path(operation: Callable): # pylint: disable=unused-argument + """ + Used for ensuring that if we've appended files to biothings-cli + path file (stored under config.BIOTHINGS_CLI_PATH), then we need to update + the system path so we can discover the modules at runtime + """ + + @functools.wraps(operation) + def update_system_path(*args, **kwargs): + + def update_system_path_from_file(): + from biothings import config + + discovery_path = pathlib.Path(config.BIOTHINGS_CLI_PATH).resolve().absolute() + path_file = discovery_path.joinpath("biothings_cli.pth") + + if path_file.exists(): + with open(path_file, "r", encoding="utf-8") as handle: + path_entries = handle.readlines() + path_entries = [entry.strip("\n") for entry in path_entries] + sys.path.extend(path_entries) + for path in path_entries: + logger.debug("Adding %s to system path", path) + + @functools.wraps(operation) + def handle_function(*args, **kwargs): + operation_result = operation(*args, **kwargs) + return operation_result + + @functools.wraps(operation) + async def handle_corountine(*args, **kwargs): + operation_result = await operation(*args, **kwargs) + return operation_result + + update_system_path_from_file() + + if inspect.iscoroutinefunction(operation): + return handle_corountine(*args, **kwargs) + else: + return handle_function(*args, **kwargs) + + return update_system_path diff --git a/biothings/cli/commands/operations.py b/biothings/cli/commands/operations.py index bf05c4fb..8bb6c90f 100644 --- a/biothings/cli/commands/operations.py +++ b/biothings/cli/commands/operations.py @@ -47,7 +47,6 @@ """ import asyncio -import functools import logging import multiprocessing import os @@ -57,7 +56,7 @@ import shutil import sys import uuid -from typing import Callable, Optional, Union +from typing import Optional, Union import jsonschema import rich @@ -67,8 +66,9 @@ from rich.console import Console from rich.panel import Panel +from biothings.cli.commands.decorators import cli_system_path, operation_mode from biothings.cli.structure import TEMPLATE_DIRECTORY -from biothings.cli.exceptions import MissingPluginName, UnknownUploaderSource +from biothings.cli.exceptions import UnknownUploaderSource from biothings.cli.utils import ( clean_dumped_files, clean_uploaded_sources, @@ -87,65 +87,8 @@ logger = logging.getLogger(name="biothings-cli") -def operation_mode(operation_method: Callable): - """ - Based off the directory structure for where the biothings-cli - was invoked we set the "mode" to one of two states: - - 0) singular - The current working directory contains a singular data-plugin - - In this case we don't require a plugin_name argument to be passed - at the command-line - - 1) hub - The current working directory contains N directories operating as a - "hub" or collection of data-plugins under one umbrella - - In this case we do require a plugin_name argument to be passed - at the command-line. Otherwise we have no idea which data-plugin to - refer to - - We attempt to load the plugin from this working directory. If we sucessfully load - either a manifest or advanced plugin, then we can safely say this is a singular - dataplugin - - If we cannot load either a manifest or advanced plugin then we default assume that - the mode is hub - """ - - @functools.wraps(operation_method) - def determine_operation_mode(*args, **kwargs): - working_directory = pathlib.Path.cwd() - working_directory_files = {file.name for file in working_directory.iterdir()} - - mode = None - if "manifest.json" in working_directory_files or "manifest.yaml" in working_directory_files: - logger.debug("Inferring singular manifest plugin from directory structure") - mode = "SINGULAR" - elif "__init__.py" in working_directory_files: - logger.debug("Inferring singular advanced plugin from directory structure") - mode = "SINGULAR" - else: - logger.debug("Inferring multiple plugins from directory structure") - mode = "HUB" - - if mode == "SINGULAR": - if kwargs.get("plugin_name", None) is not None: - kwargs["plugin_name"] = None - elif mode == "HUB": - if kwargs.get("plugin_name", None) is None: - raise MissingPluginName(working_directory) - - operation_result = operation_method(*args, **kwargs) - return operation_result - - return determine_operation_mode - - # do not apply operation_mode decorator since this operation means to create a new plugin # regardless what the current working directory has -# @operation_mode def do_create(plugin_name: str, multi_uploaders: bool = False, parallelizer: bool = False): """ Create a new data plugin from the template @@ -178,6 +121,7 @@ def do_create(plugin_name: str, multi_uploaders: bool = False, parallelizer: boo logger.info("Successfully created data plugin template at: %s\n", new_plugin_directory) +@cli_system_path @operation_mode async def do_dump(plugin_name: Optional[str] = None, show_dumped: bool = True) -> None: """ @@ -223,6 +167,7 @@ async def do_dump(plugin_name: Optional[str] = None, show_dumped: bool = True) - show_dumped_files(data_folder, assistant_instance.plugin_name) +@cli_system_path @operation_mode async def do_upload(plugin_name: Optional[str] = None, batch_limit: int = 10000, show_uploaded: bool = True) -> None: """ @@ -277,6 +222,7 @@ async def do_upload(plugin_name: Optional[str] = None, batch_limit: int = 10000, show_uploaded_sources(pathlib.Path(assistant_instance.plugin_directory), assistant_instance.plugin_name) +@cli_system_path @operation_mode async def do_parallel_upload( plugin_name: Optional[str] = None, batch_limit: int = 10000, show_uploaded: bool = True @@ -344,6 +290,7 @@ async def do_parallel_upload( show_uploaded_sources(pathlib.Path(assistant_instance.plugin_directory), assistant_instance.plugin_name) +@cli_system_path @operation_mode async def do_dump_and_upload(plugin_name: str) -> None: """ @@ -354,6 +301,7 @@ async def do_dump_and_upload(plugin_name: str) -> None: logger.info("[green]Success![/green] :rocket:", extra={"markup": True}) +@cli_system_path @operation_mode async def do_index(plugin_name: Optional[str] = None, sub_source_name: Optional[str] = None) -> None: """ @@ -540,6 +488,7 @@ async def do_index(plugin_name: Optional[str] = None, sub_source_name: Optional[ await show_source_index(index_name, assistant_instance.index_manager, elasticsearch_mapping) +@cli_system_path @operation_mode async def do_list( plugin_name: Optional[str] = None, dump: bool = True, upload: bool = True, hubdb: bool = False @@ -569,6 +518,7 @@ async def do_list( show_hubdb_content() +@cli_system_path @operation_mode async def do_inspect( plugin_name: Optional[str] = None, @@ -633,6 +583,7 @@ async def do_inspect( write_mapping_to_file(sub_output, inspection_mapping) +@cli_system_path @operation_mode async def do_serve(plugin_name: Optional[str] = None, host: str = "localhost", port: int = 9999): """ @@ -651,6 +602,7 @@ async def do_serve(plugin_name: Optional[str] = None, host: str = "localhost", p await main(host=host, port=port, db=src_db, table_space=table_space) +@cli_system_path @operation_mode async def do_clean( plugin_name: Optional[str] = None, dump: bool = False, upload: bool = False, clean_all: bool = False @@ -714,6 +666,7 @@ async def display_schema(): console.print(panel) +@cli_system_path @operation_mode async def validate_manifest(plugin_name: Optional[str] = None): """ diff --git a/biothings/cli/commands/pathing.py b/biothings/cli/commands/pathing.py new file mode 100644 index 00000000..e0a7bb95 --- /dev/null +++ b/biothings/cli/commands/pathing.py @@ -0,0 +1,171 @@ +""" +Module for creating the cli interface for the path interface +""" + +import logging +import pathlib +import sys + +import typer +from rich.console import Console +from rich.table import Table + +from biothings.cli.commands.decorators import cli_system_path, operation_mode + + +SHORT_HELP = ( + "[green]CLI tool for viewing the python system path and adding external directories to the system path[/green]" +) +FULL_HELP = ( + SHORT_HELP + + "\n\n[magenta] :sparkles: Run from an existing data plugin folder to evaluate a singular data plugin.[/magenta]" +) +path_application = typer.Typer( + help=FULL_HELP, + short_help=SHORT_HELP, + no_args_is_help=True, + rich_markup_mode="rich", +) + +logger = logging.getLogger(name="biothings-cli") + + +@path_application.command(name="view") +def view_system_path() -> None: + """ + View the system paths current discovered by python, along with potential hub directories of interest + that the user may wish to add to the system path for usage in data plugin testing + """ + display_system_paths() + + +@path_application.command(name="add") +def add_parser_to_system_path() -> None: + """ + Add discovered hub directory paths to the python system path for aiding in testing various data plugins + Creates the file "bt_custom.pth" (uses .pth extension to mimic the `site` module internal to + python). It creates this file in the .biothings_hub/path directory. If found while running a + command, then the paths in the file with be added the system path prior to executing the command + """ + update_system_paths() + display_system_paths() + + +@path_application.command(name="remove") +def remove_parser_from_system_path() -> None: + """ + Remove the hub directories discovered from the python system path + Simply removes the bt_custom.pth file from the biothings-cli directory + """ + remove_system_paths() + + +@cli_system_path +@operation_mode +def display_system_paths() -> None: + """ + Method for displaying the system path information used for the + biothing-cli application + + External method so we can call it from multiple typer commands + """ + path_table = Table(title="Python System Path(s)") + + path_table.add_column("Index", style="cyan") + path_table.add_column("Paths", style="green") + + system_paths = sys.path + for index, system_path in enumerate(system_paths): + path_table.add_row(str(index), str(system_path)) + + parser_table = Table(title="External Parser Path(s)") + + parser_table.add_column("Index", style="cyan") + parser_table.add_column("Paths", style="magenta") + parser_table.add_column("On System Path?", style="steel_blue1") + + hub_parser_paths = find_hub_parsers() + for index, parser_path in enumerate(hub_parser_paths): + parser_table.add_row(str(index), str(parser_path), str(str(parser_path.parent) in system_paths)) + + console = Console() + console.print(path_table) + console.print(parser_table) + + +@cli_system_path +@operation_mode +def update_system_paths() -> None: + from biothings import config + + discovery_path = pathlib.Path(config.BIOTHINGS_CLI_PATH).resolve().absolute() + discovery_path.mkdir(parents=True, exist_ok=True) + + hub_parser_paths = find_hub_parsers() + + # The actual path that needs to be added is the parent of the hub directory + hub_parser_paths = [path.parent for path in hub_parser_paths] + + path_file = discovery_path.joinpath("biothings_cli.pth") + with open(path_file, "w", encoding="utf-8") as path_handle: + for parser_path in hub_parser_paths: + logger.info("Adding %s -> %s", parser_path, path_file) + path_handle.write(f"{parser_path}\n") + + +@cli_system_path +@operation_mode +def remove_system_paths() -> None: + from biothings import config + + discovery_path = pathlib.Path(config.BIOTHINGS_CLI_PATH).resolve().absolute() + path_file = discovery_path.joinpath("biothings_cli.pth") + path_file.unlink(missing_ok=True) + + hub_parser_paths = find_hub_parsers() + for parser_path in hub_parser_paths: + try: + sys.path.remove(str(parser_path)) + except ValueError: + pass + + +def find_hub_parsers(upward_depth: int = 2) -> list[pathlib.Path]: + """ + Attempts to locate any potential hub-based parsers that are use across different plugins + within a shared hub instance + + Will attempt to traverse recursively by levels (defaults to 2 levels) above the present working directory + The typical hub structure has the plugins directory at the same level as the hub directory + + pending.api structure: + root + ├── hub + ├── plugins + + (mygene, mychem, myvariant, ...) structure + root + ├── src + │   ├── hub + │   ├── plugins + + In either structure, the user is expected to be operating within the directory of a specific + plugin (root/plugin/plugin_directory/) or acting as a HUB within the (root/plugin) directory + Either case we should be able to find the shared parsers within 2 upper levels + """ + directory_pointer = pathlib.Path.cwd() + + traversal_counter = 0 + external_parser_paths = [] + + # Match any path ending explicitly in hub. The bracket "[a]" matches the character literal + # enclosed in the bracket, so [h][u][b] matches the literal hub + match_expr = "**/[h][u][b]" + while traversal_counter < upward_depth: + directory_pointer = directory_pointer.parent + for hub_path in directory_pointer.glob(match_expr): + hub_dataload = hub_path.joinpath("dataload") + if hub_dataload.exists(): + external_parser_paths.append(hub_path.resolve().absolute()) + traversal_counter += 1 + return external_parser_paths diff --git a/biothings/cli/manager.py b/biothings/cli/manager.py index 511903b9..eb0cd565 100644 --- a/biothings/cli/manager.py +++ b/biothings/cli/manager.py @@ -29,13 +29,13 @@ async def defer_to_process(self, pinfo=None, func=None, *args, **kwargs): async def defer_to_thread(self, pinfo=None, func=None, *args): """keep the same signature as JobManager.defer_to_thread. The passed pinfo is ignored""" - async def run(fut, func): + async def run(fut, func, *args): try: - res = func() + res = func(*args) fut.set_result(res) except Exception as gen_exc: fut.set_exception(gen_exc) fut = self.loop.create_future() - self.loop.create_task(run(fut, func)) + self.loop.create_task(run(fut, func, *args)) return fut