From 51b613518e4dc9779780c246b81942e059f45ca5 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 31 Mar 2025 18:05:43 -0700 Subject: [PATCH 01/11] added backup and restore item definitions --- src/sempy_labs/_items.py | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 src/sempy_labs/_items.py diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py new file mode 100644 index 00000000..f8bcb575 --- /dev/null +++ b/src/sempy_labs/_items.py @@ -0,0 +1,52 @@ +import sempy.fabric as fabric +from sempy_labs._helper_functions import ( + _base_api, + resolve_workspace_name_and_id, + resolve_lakehouse_name_and_id, + _mount, +) +from uuid import UUID +from typing import Optional +import json +import sempy_labs._icons as icons + + +def backup_item_definitions(workspace: Optional[str | UUID] = None, lakehouse: Optional[str | UUID] = None, lakehouse_workspace: Optional[str | UUID] = None): + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(lakehouse_workspace) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(lakehouse=lakehouse, workspace=lakehouse_workspace_id) + local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) + + items = ['Report', 'SemanticModel'] + + dfI = fabric.list_items(workspace=workspace) + dfI_filt = dfI[dfI['Type'].isin([items])] + + for _, r in dfI_filt.iterrows(): + item_name = r['Display Name'] + item_id = r['Id'] + item_type = r['Type'] + result = _base_api(request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", method="post", lro_return_json=True, status_codes=None) + + file_path = f"{local_path}/Files/{workspace_name}/{item_type}/{item_name}.json" + + with open(file_path, "w") as json_file: + json.dump(result, json_file, indent=4) + print(f"{icons.green_dot} The '{item_name}' {item_type}' definition has been backed up to the Files section of the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace.") + + +def restore_item_definitions(source_workspace: str, target_workspace: Optional[str | UUID] = None, lakehouse: Optional[str | UUID] = None, lakehouse_workspace: Optional[str | UUID] = None): + + from sempy_labs.lakehouse._blobs import list_blobs + (target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id(target_workspace) + (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(lakehouse_workspace) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(lakehouse=lakehouse, workspace=lakehouse_workspace_id) + + blobs = list_blobs(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id, countainer="Files") + blobs_filt = blobs[(blobs['Name'].str.startswith(source_workspace)) & (blobs['Name'].str.endswith(".json"))] + + for _, r in blobs_filt.iterrows(): + item_type = r['Name'].split("/")[-2] + item_name = r['Name'].split("/")[-1].replace(".json", "") + # Create items... From df1743dce4ee4dddb281036f9d2ddd4e61b5ae8c Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 1 Apr 2025 09:52:16 -0700 Subject: [PATCH 02/11] updated restore --- src/sempy_labs/_items.py | 91 ++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 18 deletions(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index f8bcb575..abf4a9af 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -11,42 +11,97 @@ import sempy_labs._icons as icons -def backup_item_definitions(workspace: Optional[str | UUID] = None, lakehouse: Optional[str | UUID] = None, lakehouse_workspace: Optional[str | UUID] = None): +def backup_item_definitions( + workspace: Optional[str | UUID] = None, + lakehouse: Optional[str | UUID] = None, + lakehouse_workspace: Optional[str | UUID] = None, +): (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(lakehouse_workspace) - (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(lakehouse=lakehouse, workspace=lakehouse_workspace_id) + (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id( + lakehouse_workspace + ) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=lakehouse_workspace_id + ) local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) - items = ['Report', 'SemanticModel'] + items = ["Report", "SemanticModel"] dfI = fabric.list_items(workspace=workspace) - dfI_filt = dfI[dfI['Type'].isin([items])] + dfI_filt = dfI[dfI["Type"].isin([items])] for _, r in dfI_filt.iterrows(): - item_name = r['Display Name'] - item_id = r['Id'] - item_type = r['Type'] - result = _base_api(request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", method="post", lro_return_json=True, status_codes=None) + item_name = r["Display Name"] + item_id = r["Id"] + item_type = r["Type"] + result = _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", + method="post", + lro_return_json=True, + status_codes=None, + ) file_path = f"{local_path}/Files/{workspace_name}/{item_type}/{item_name}.json" with open(file_path, "w") as json_file: json.dump(result, json_file, indent=4) - print(f"{icons.green_dot} The '{item_name}' {item_type}' definition has been backed up to the Files section of the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace.") + print( + f"{icons.green_dot} The '{item_name}' {item_type}' definition has been backed up to the Files section of the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace." + ) -def restore_item_definitions(source_workspace: str, target_workspace: Optional[str | UUID] = None, lakehouse: Optional[str | UUID] = None, lakehouse_workspace: Optional[str | UUID] = None): +def restore_item_definitions( + source_workspace: str, + target_workspace: Optional[str | UUID] = None, + lakehouse: Optional[str | UUID] = None, + lakehouse_workspace: Optional[str | UUID] = None, +): from sempy_labs.lakehouse._blobs import list_blobs - (target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id(target_workspace) - (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id(lakehouse_workspace) - (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(lakehouse=lakehouse, workspace=lakehouse_workspace_id) - blobs = list_blobs(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id, countainer="Files") - blobs_filt = blobs[(blobs['Name'].str.startswith(source_workspace)) & (blobs['Name'].str.endswith(".json"))] + (target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id( + target_workspace + ) + (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id( + lakehouse_workspace + ) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=lakehouse_workspace_id + ) + + blobs = list_blobs( + lakehouse=lakehouse_id, workspace=lakehouse_workspace_id, container="Files" + ) + blobs_filt = blobs[ + (blobs["Blob Name"].str.startswith(f"{lakehouse_id}/Files/{source_workspace}")) + & (blobs["Blob Name"].str.endswith(".json")) + ] + + local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) for _, r in blobs_filt.iterrows(): - item_type = r['Name'].split("/")[-2] - item_name = r['Name'].split("/")[-1].replace(".json", "") + blob_name = r["Blob Name"] + blob_file = blob_name.split(f"{lakehouse_id}")[1][1:] + split = blob_name.split("/") + item_type = split[-2] + item_name = split[-1].replace(".json", "") + file_path = f"{local_path}/{blob_file}" + with open(file_path, "r", encoding="utf-8") as file: + data = json.load(file) + + payload = { + "displayName": item_name, + "type": item_type, + "description": "", + "definition": json.dumps(data, indent=2), + } + + _base_api( + request=f"/v1/workspaces/{target_workspace_id}/items", + method="post", + payload=payload, + status_codes=[201, 202], + lro_return_status_code=True, + ) # Create items... From 76bb3d080538ecbbe853d9ac70ca2ffbc3556d28 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 1 Apr 2025 10:12:56 -0700 Subject: [PATCH 03/11] update --- src/sempy_labs/_items.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index abf4a9af..64a7b7f4 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -34,18 +34,29 @@ def backup_item_definitions( for _, r in dfI_filt.iterrows(): item_name = r["Display Name"] item_id = r["Id"] + description = r["Description"] + # folder_id = r['Folder Id'] item_type = r["Type"] - result = _base_api( + definition = _base_api( request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", method="post", lro_return_json=True, status_codes=None, ) - file_path = f"{local_path}/Files/{workspace_name}/{item_type}/{item_name}.json" + file_path_prefix = f"{local_path}/Files/{workspace_name}/{item_type}/{item_name}" + definition_file_path = f"{file_path_prefix}/definition.json" + info_file_path = f"{file_path_prefix}/info.json" - with open(file_path, "w") as json_file: - json.dump(result, json_file, indent=4) + info = { + "description": description, + "folderId": None, # Update to folder_id + } + + with open(definition_file_path, "w") as json_file: + json.dump(definition, json_file, indent=4) + with open(info_file_path, "w") as json_file: + json.dump(info, json_file, indent=4) print( f"{icons.green_dot} The '{item_name}' {item_type}' definition has been backed up to the Files section of the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace." ) @@ -86,7 +97,7 @@ def restore_item_definitions( split = blob_name.split("/") item_type = split[-2] item_name = split[-1].replace(".json", "") - file_path = f"{local_path}/{blob_file}" + definition_file_path = f"{local_path}/{blob_file}" with open(file_path, "r", encoding="utf-8") as file: data = json.load(file) @@ -96,7 +107,8 @@ def restore_item_definitions( "description": "", "definition": json.dumps(data, indent=2), } - + + # Create items... _base_api( request=f"/v1/workspaces/{target_workspace_id}/items", method="post", @@ -104,4 +116,4 @@ def restore_item_definitions( status_codes=[201, 202], lro_return_status_code=True, ) - # Create items... + From 39d422317b4d29542e807febf38aab5eef45432f Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 2 Apr 2025 09:18:08 -0700 Subject: [PATCH 04/11] updates --- src/sempy_labs/_items.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index 64a7b7f4..9d01a55b 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -44,19 +44,14 @@ def backup_item_definitions( status_codes=None, ) - file_path_prefix = f"{local_path}/Files/{workspace_name}/{item_type}/{item_name}" - definition_file_path = f"{file_path_prefix}/definition.json" - info_file_path = f"{file_path_prefix}/info.json" + file_path = f"{local_path}/Files/{workspace_name}/{item_type}/{item_name}.json" - info = { - "description": description, - "folderId": None, # Update to folder_id - } + definition['description'] = description + definition['folderId'] = None # Update to folder_id - with open(definition_file_path, "w") as json_file: + with open(file_path, "w") as json_file: json.dump(definition, json_file, indent=4) - with open(info_file_path, "w") as json_file: - json.dump(info, json_file, indent=4) + print( f"{icons.green_dot} The '{item_name}' {item_type}' definition has been backed up to the Files section of the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace." ) @@ -98,16 +93,22 @@ def restore_item_definitions( item_type = split[-2] item_name = split[-1].replace(".json", "") definition_file_path = f"{local_path}/{blob_file}" - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) + with open(definition_file_path, "r", encoding="utf-8") as file: + definition = json.load(file) + + description = definition.get('definition') + folder_id = definition.get('folderId') + + definition.pop('description') + definition.pop('folderId') payload = { "displayName": item_name, "type": item_type, - "description": "", - "definition": json.dumps(data, indent=2), + "description": description, + "definition": json.dumps(definition, indent=2), } - + # Create items... _base_api( request=f"/v1/workspaces/{target_workspace_id}/items", From e335a624f5649a91a6f4d1dbc1b11e1d0d801c79 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 3 Apr 2025 11:05:36 -0700 Subject: [PATCH 05/11] folders --- src/sempy_labs/_folders.py | 268 +++++++++++++++++++++++++++++++++++++ src/sempy_labs/_items.py | 52 +++++-- 2 files changed, 306 insertions(+), 14 deletions(-) create mode 100644 src/sempy_labs/_folders.py diff --git a/src/sempy_labs/_folders.py b/src/sempy_labs/_folders.py new file mode 100644 index 00000000..a8531644 --- /dev/null +++ b/src/sempy_labs/_folders.py @@ -0,0 +1,268 @@ +import pandas as pd +from typing import Optional +from uuid import UUID +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + resolve_workspace_id, + _base_api, + _create_dataframe, + _update_dataframe_datatypes, + _is_valid_uuid, +) +import sempy_labs._icons as icons + + +def list_folders( + workspace: Optional[str | UUID] = None, + recursive: bool = True, + root_folder: Optional[str | UUID] = None, +) -> pd.DataFrame: + """ + Shows a list of folders from the specified workspace. + + Parameters + ---------- + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + recursive : bool, default=True + Lists folders in a folder and its nested folders, or just a folder only. True - All folders in the folder and its nested folders are listed, False - Only folders in the folder are listed. + root_folder : str | uuid.UUID, default=None + This parameter allows users to filter folders based on a specific root folder. If not provided, the workspace is used as the root folder. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of folders from the specified workspace. + """ + + workspace_id = resolve_workspace_id(workspace) + + columns = { + "Folder Name": "string", + "Folder Id": "string", + "Parent Folder Id": "string", + } + + df = _create_dataframe(columns=columns) + + url = f"/v1/workspaces/{workspace_id}/folders?recursive={recursive}" + + if _is_valid_uuid(root_folder): + url += f"&rootFolderId={root_folder}" + + responses = _base_api( + request=url, + client="fabric_sp", + uses_pagination=True, + ) + + for r in responses: + for v in r.get("value", []): + new_data = { + "Folder Name": v.get("displayName"), + "Folder Id": v.get("id"), + "Parent Folder Id": v.get("parentFolderId"), + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + # Add folder path + folder_map = {row["Folder Id"]: row["Folder Name"] for _, row in df.iterrows()} + + def get_folder_path(folder_id): + if folder_id not in folder_map: + return "" + + row = df.loc[df["Folder Id"] == folder_id].iloc[0] + if "Parent Folder Id" in row: + return get_folder_path(row["Parent Folder Id"]) + "/" + row["Folder Name"] + return row["Folder Name"] + + # Apply function to create the path column + df["Folder Path"] = df["Folder Id"].apply(get_folder_path) + + # Filter the folders if specified + if root_folder is not None and not _is_valid_uuid(root_folder): + root = df[df["Folder Name"] == root_folder] + if root.empty: + raise ValueError(f"Folder name '{root_folder}' not found.") + root_folder_id = root["Folder Id"].iloc[0] + df = df[df["Parent Folder Id"] == root_folder_id] + + return df + + +def create_folder( + name, + workspace: Optional[str | UUID] = None, + parent_folder: Optional[str | UUID] = None, +): + """ + Creates a new folder in the specified workspace. + + Parameters + ---------- + name : str + The name of the folder to create. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + parent_folder : str | uuid.UUID, default=None + The ID of the parent folder. If not provided, the folder will be created in the root folder of the workspace. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + url = f"/v1/workspaces/{workspace_id}/folders" + + payload = { + "displayName": name, + } + + if parent_folder: + parent_folder_id = resolve_folder_id(folder=parent_folder, workspace=workspace) + payload["parentFolderId"] = parent_folder_id + + _base_api( + request=url, + client="fabric_sp", + method="post", + payload=payload, + status_codes=201, + ) + + print( + f"{icons.green_dot} The '{name}' folder has been successfully created within the '{workspace_name}' workspace." + ) + + +def resolve_folder_id( + folder: str | UUID, workspace: Optional[str | UUID] = None +) -> UUID: + + if _is_valid_uuid(folder): + return folder + else: + df = list_folders(workspace=workspace) + if not folder.startswith("/"): + folder_path = f"/{folder}" + else: + folder_path = folder + df_filt = df[df["Folder Path"] == folder_path] + if df_filt.empty: + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + raise ValueError( + f"{icons.red_dot} The '{folder}' folder does not exist within the '{workspace_name}' workspace." + ) + return df_filt["Folder Id"].iloc[0] + + +def delete_folder(folder: str | UUID, workspace: Optional[str | UUID] = None): + """ + Deletes a folder from the specified workspace. + + Parameters + ---------- + folder : str | uuid.UUID + The name or ID of the folder to move. If the folder is a subfolder, specify the path (i.e. "/folder/subfolder"). + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + folder_id = resolve_folder_id(folder=folder, workspace=workspace) + + _base_api( + request=f"/v1/workspaces/{workspace_id}/folders/{folder_id}", + client="fabric_sp", + method="delete", + ) + + print( + f"{icons.green_dot} The '{folder}' folder has been successfully deleted from the '{workspace_name}' workspace." + ) + + +def move_folder( + folder: str | UUID, + target_folder: str | UUID, + workspace: Optional[str | UUID] = None, +): + """ + Moves a folder to a new location in the workspace. + + Parameters + ---------- + folder : str | uuid.UUID + The name or ID of the folder to move. If the folder is a subfolder, specify the path (i.e. "/folder/subfolder"). + target_folder : str | uuid.UUID + The name or ID of the target folder where the folder will be moved. If the folder is a subfolder, specify the path (i.e. "/folder/subfolder"). + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + target_folder_id = resolve_folder_id(folder=target_folder, workspace=workspace) + folder_id = resolve_folder_id(folder=folder, workspace=workspace) + + payload = { + "targetFolderId": target_folder_id, + } + + _base_api( + request=f"/v1/workspaces/{workspace_id}/folders/{folder_id}/move", + client="fabric_sp", + method="post", + payload=payload, + ) + + print( + f"{icons.green_dot} The '{folder}' folder has been successfully moved to the '{target_folder}' folder within the '{workspace_name}' workspace." + ) + + +def update_folder( + folder: str | UUID, name: str, workspace: Optional[str | UUID] = None +): + """ + Updates the name of a folder in the specified workspace. + + Parameters + ---------- + folder : str | uuid.UUID + The name/path or ID of the folder to update. If the folder is a subfolder, specify the path (i.e. "/folder/subfolder"). + name : str + The new name for the folder. Must meet the `folder name requirements `_. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + folder_id = resolve_folder_id(folder=folder, workspace=workspace) + + payload = { + "displayName": name, + } + + _base_api( + request=f"/v1/workspaces/{workspace_id}/folders/{folder_id}", + client="fabric_sp", + method="patch", + payload=payload, + ) + + print( + f"{icons.green_dot} The '{folder}' folder has been successfully updated to '{name}' within the '{workspace_name}' workspace." + ) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index 9d01a55b..cda2eebc 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -1,4 +1,6 @@ import sempy.fabric as fabric +import os +import pandas as pd from sempy_labs._helper_functions import ( _base_api, resolve_workspace_name_and_id, @@ -9,6 +11,8 @@ from typing import Optional import json import sempy_labs._icons as icons +from sempy_labs.lakehouse._blobs import list_blobs +from sempy_labs._folders import list_folders def backup_item_definitions( @@ -16,6 +20,9 @@ def backup_item_definitions( lakehouse: Optional[str | UUID] = None, lakehouse_workspace: Optional[str | UUID] = None, ): + """ + Backups the item definitions of a workspace to a lakehouse. + """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id( @@ -25,6 +32,7 @@ def backup_item_definitions( lakehouse=lakehouse, workspace=lakehouse_workspace_id ) local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) + path_prefix = f"{local_path}/Files/{workspace_name}/" items = ["Report", "SemanticModel"] @@ -35,7 +43,7 @@ def backup_item_definitions( item_name = r["Display Name"] item_id = r["Id"] description = r["Description"] - # folder_id = r['Folder Id'] + folder_id = r["Folder Id"] item_type = r["Type"] definition = _base_api( request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", @@ -44,10 +52,13 @@ def backup_item_definitions( status_codes=None, ) - file_path = f"{local_path}/Files/{workspace_name}/{item_type}/{item_name}.json" + df_filt = dfF[dfF["Folder Id"] == folder_id] + folder_path = df_filt["Folder Path"].iloc[0] - definition['description'] = description - definition['folderId'] = None # Update to folder_id + file_path = f"{path_prefix}/{item_type}/{item_name}.json" + + definition["description"] = description + definition["folderPath"] = folder_path with open(file_path, "w") as json_file: json.dump(definition, json_file, indent=4) @@ -56,6 +67,11 @@ def backup_item_definitions( f"{icons.green_dot} The '{item_name}' {item_type}' definition has been backed up to the Files section of the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace." ) + # Save folder structure + dfF = list_folders(workspace=workspace) + with open(f"{path_prefix}/folderStructure.json", "w") as json_file: + json.dump(dfF.to_json(), json_file, indent=4) + def restore_item_definitions( source_workspace: str, @@ -64,8 +80,6 @@ def restore_item_definitions( lakehouse_workspace: Optional[str | UUID] = None, ): - from sempy_labs.lakehouse._blobs import list_blobs - (target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id( target_workspace ) @@ -86,21 +100,32 @@ def restore_item_definitions( local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) + # Create the folder structure + with open( + f"{local_path}/Files/folderStructure.json", "r", encoding="utf-8" + ) as file: + df_folders = pd.json_normalize(json.load(file)) + for _, r in df_folders.iterrows(): + folder_name = r["Folder Name"] + folder_path = r["Folder Path"] + for _, r in blobs_filt.iterrows(): blob_name = r["Blob Name"] blob_file = blob_name.split(f"{lakehouse_id}")[1][1:] - split = blob_name.split("/") - item_type = split[-2] - item_name = split[-1].replace(".json", "") + file_name = os.path.basename(blob_file) + # directory = os.path.dirname(blob_file) + # folder_structure = os.path.dirname(directory) + item_type = os.path.basename(os.path.dirname(blob_file)) + item_name = os.path.splitext(file_name)[0] definition_file_path = f"{local_path}/{blob_file}" with open(definition_file_path, "r", encoding="utf-8") as file: definition = json.load(file) - description = definition.get('definition') - folder_id = definition.get('folderId') + description = definition.get("definition") + folder_path = definition.get("folderPath") - definition.pop('description') - definition.pop('folderId') + definition.pop("description") + definition.pop("folderPath") payload = { "displayName": item_name, @@ -117,4 +142,3 @@ def restore_item_definitions( status_codes=[201, 202], lro_return_status_code=True, ) - From ebb0785962dd00b7a48819e3db6c64a78785e625 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 27 Apr 2025 11:39:51 +0300 Subject: [PATCH 06/11] update --- src/sempy_labs/_items.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index cda2eebc..9b7129ed 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -37,7 +37,12 @@ def backup_item_definitions( items = ["Report", "SemanticModel"] dfI = fabric.list_items(workspace=workspace) - dfI_filt = dfI[dfI["Type"].isin([items])] + dfI_filt = dfI[dfI["Type"].isin(items)] + + # Save folder structure + dfF = list_folders(workspace=workspace) + with open(f"{path_prefix}/folderStructure.json", "w") as json_file: + json.dump(dfF.to_json(), json_file, indent=4) for _, r in dfI_filt.iterrows(): item_name = r["Display Name"] @@ -67,11 +72,6 @@ def backup_item_definitions( f"{icons.green_dot} The '{item_name}' {item_type}' definition has been backed up to the Files section of the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace." ) - # Save folder structure - dfF = list_folders(workspace=workspace) - with open(f"{path_prefix}/folderStructure.json", "w") as json_file: - json.dump(dfF.to_json(), json_file, indent=4) - def restore_item_definitions( source_workspace: str, From c33d568793614506ee4d701770043e6be1f44ef5 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 27 Apr 2025 11:50:00 +0300 Subject: [PATCH 07/11] change --- src/sempy_labs/_items.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index 9b7129ed..57016a38 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -36,8 +36,11 @@ def backup_item_definitions( items = ["Report", "SemanticModel"] - dfI = fabric.list_items(workspace=workspace) - dfI_filt = dfI[dfI["Type"].isin(items)] + #dfI = fabric.list_items(workspace=workspace) + response = _base_api(request=f"/v1/workspaces/{workspace_id}/items?recursive=True") + df = pd.json_normalize(response.json()['value']) + dfI_filt = df[df['type'].isin(items)] + #dfI_filt = dfI[dfI["Type"].isin(items)] # Save folder structure dfF = list_folders(workspace=workspace) @@ -45,11 +48,11 @@ def backup_item_definitions( json.dump(dfF.to_json(), json_file, indent=4) for _, r in dfI_filt.iterrows(): - item_name = r["Display Name"] - item_id = r["Id"] - description = r["Description"] - folder_id = r["Folder Id"] - item_type = r["Type"] + item_name = r["displayName"] + item_id = r["id"] + description = r["descritption"] + folder_id = r["folderid"] + item_type = r["type"] definition = _base_api( request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", method="post", From 0ce5c52bcdffd76b61536ccd950f59b92cd50fb2 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 27 Apr 2025 12:27:26 +0300 Subject: [PATCH 08/11] backup is working now --- src/sempy_labs/_items.py | 53 +++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index 57016a38..64e5b6a2 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -32,26 +32,41 @@ def backup_item_definitions( lakehouse=lakehouse, workspace=lakehouse_workspace_id ) local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) - path_prefix = f"{local_path}/Files/{workspace_name}/" - - items = ["Report", "SemanticModel"] - - #dfI = fabric.list_items(workspace=workspace) + path_prefix = f"{local_path}/Files/SLL_backup_item_definitions/{workspace_name}" + + # Item types which have definitions + items = [ + "CopyJob", + "Eventhouse", + "DataPipeline", + "KQLDatabase", + "KQLDashboard", + "KQLQueryset", + "MirroredDatabase", + "MountedDataFactory", + "Environment", + "Notebook", + "Report", + "SemanticModel", + "Eventstream", + "Reflex", + "SparkJobDefinition", + "VariableLibrary", + ] # Dataflow, GraphQLApi + + # dfI = fabric.list_items(workspace=workspace) response = _base_api(request=f"/v1/workspaces/{workspace_id}/items?recursive=True") - df = pd.json_normalize(response.json()['value']) - dfI_filt = df[df['type'].isin(items)] - #dfI_filt = dfI[dfI["Type"].isin(items)] + df = pd.json_normalize(response.json()["value"]) + dfI_filt = df[df["type"].isin(items)] + # dfI_filt = dfI[dfI["Type"].isin(items)] - # Save folder structure dfF = list_folders(workspace=workspace) - with open(f"{path_prefix}/folderStructure.json", "w") as json_file: - json.dump(dfF.to_json(), json_file, indent=4) for _, r in dfI_filt.iterrows(): item_name = r["displayName"] item_id = r["id"] - description = r["descritption"] - folder_id = r["folderid"] + description = r["description"] + folder_id = r.get("folderId") item_type = r["type"] definition = _base_api( request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", @@ -60,14 +75,18 @@ def backup_item_definitions( status_codes=None, ) - df_filt = dfF[dfF["Folder Id"] == folder_id] - folder_path = df_filt["Folder Path"].iloc[0] - - file_path = f"{path_prefix}/{item_type}/{item_name}.json" + # Obtain the folder path + folder_path = "" + if folder_id: + df_filt = dfF[dfF["Folder Id"] == folder_id] + if not df_filt.empty: + folder_path = df_filt["Folder Path"].iloc[0] definition["description"] = description definition["folderPath"] = folder_path + file_path = f"{path_prefix}/{item_type}/{item_name}.json" + os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, "w") as json_file: json.dump(definition, json_file, indent=4) From 6b5a1be04b025d5b0ece013b747aa6781c28aff4 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 27 Apr 2025 12:40:36 +0300 Subject: [PATCH 09/11] fixes --- src/sempy_labs/_items.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index 64e5b6a2..55690315 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -49,7 +49,7 @@ def backup_item_definitions( "Report", "SemanticModel", "Eventstream", - "Reflex", + # "Reflex", "SparkJobDefinition", "VariableLibrary", ] # Dataflow, GraphQLApi @@ -68,6 +68,7 @@ def backup_item_definitions( description = r["description"] folder_id = r.get("folderId") item_type = r["type"] + print(f"{item_name} : {item_type}") definition = _base_api( request=f"/v1/workspaces/{workspace_id}/items/{item_id}/getDefinition", method="post", From ae9b8a8da007fb49e748bd4c319698f4c5b7b0bd Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 27 Apr 2025 14:09:40 +0300 Subject: [PATCH 10/11] restore --- src/sempy_labs/_items.py | 87 ++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index 55690315..43e87243 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -15,6 +15,29 @@ from sempy_labs._folders import list_folders +# Item types which have definitions +item_list = [ + "CopyJob", + "Eventhouse", + "DataPipeline", + "KQLDatabase", + "KQLDashboard", + "KQLQueryset", + "MirroredDatabase", + "MountedDataFactory", + "Environment", + "Notebook", + "Report", + "SemanticModel", + "Eventstream", + # "Reflex", # This API is not working + "SparkJobDefinition", + "VariableLibrary", + # Dataflow, + # GraphQLApi, +] + + def backup_item_definitions( workspace: Optional[str | UUID] = None, lakehouse: Optional[str | UUID] = None, @@ -34,30 +57,10 @@ def backup_item_definitions( local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) path_prefix = f"{local_path}/Files/SLL_backup_item_definitions/{workspace_name}" - # Item types which have definitions - items = [ - "CopyJob", - "Eventhouse", - "DataPipeline", - "KQLDatabase", - "KQLDashboard", - "KQLQueryset", - "MirroredDatabase", - "MountedDataFactory", - "Environment", - "Notebook", - "Report", - "SemanticModel", - "Eventstream", - # "Reflex", - "SparkJobDefinition", - "VariableLibrary", - ] # Dataflow, GraphQLApi - # dfI = fabric.list_items(workspace=workspace) response = _base_api(request=f"/v1/workspaces/{workspace_id}/items?recursive=True") df = pd.json_normalize(response.json()["value"]) - dfI_filt = df[df["type"].isin(items)] + dfI_filt = df[df["type"].isin(item_list)] # dfI_filt = dfI[dfI["Type"].isin(items)] dfF = list_folders(workspace=workspace) @@ -97,40 +100,48 @@ def backup_item_definitions( def restore_item_definitions( - source_workspace: str, + backup_file_path: str, target_workspace: Optional[str | UUID] = None, - lakehouse: Optional[str | UUID] = None, - lakehouse_workspace: Optional[str | UUID] = None, ): + """ + Creates items based on an item definition backup file path. + + Parameters + ---------- + backup_file_path : str + The path to the backup file. For example: "abfss://{lakehouse_id}@onelake.dfs.fabric.microsoft.com/{workspace_id}/Files/SLL_backup_item_definitions/My Workspace Name" + target_workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ (target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id( target_workspace ) - (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id( - lakehouse_workspace - ) - (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( - lakehouse=lakehouse, workspace=lakehouse_workspace_id - ) + + lakehouse_workspace_id = backup_file_path.split('abfss://')[1].split('@')[0] + lakehouse_id = backup_file_path.split('microsoft.com/')[1].split('/')[0] + folder_path = backup_file_path.split(f'microsoft.com/{lakehouse_id}/')[1] blobs = list_blobs( lakehouse=lakehouse_id, workspace=lakehouse_workspace_id, container="Files" ) blobs_filt = blobs[ - (blobs["Blob Name"].str.startswith(f"{lakehouse_id}/Files/{source_workspace}")) + (blobs["Blob Name"].str.startswith(f"{lakehouse_id}/{folder_path}")) & (blobs["Blob Name"].str.endswith(".json")) ] local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) # Create the folder structure - with open( - f"{local_path}/Files/folderStructure.json", "r", encoding="utf-8" - ) as file: - df_folders = pd.json_normalize(json.load(file)) - for _, r in df_folders.iterrows(): - folder_name = r["Folder Name"] - folder_path = r["Folder Path"] + #with open( + # f"{local_path}/Files/folderStructure.json", "r", encoding="utf-8" + #) as file: + # df_folders = pd.json_normalize(json.load(file)) + # for _, r in df_folders.iterrows(): + # folder_name = r["Folder Name"] + # folder_path = r["Folder Path"] for _, r in blobs_filt.iterrows(): blob_name = r["Blob Name"] From e78849f7218a3e34b9dc5aca1cf5c21048de8156 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 28 Apr 2025 10:57:01 +0300 Subject: [PATCH 11/11] fixed restore --- src/sempy_labs/__init__.py | 2 + src/sempy_labs/_folders.py | 11 ++++- src/sempy_labs/_items.py | 91 ++++++++++++++++++++++++++++++-------- 3 files changed, 84 insertions(+), 20 deletions(-) diff --git a/src/sempy_labs/__init__.py b/src/sempy_labs/__init__.py index 4f3d14f5..88a6e45e 100644 --- a/src/sempy_labs/__init__.py +++ b/src/sempy_labs/__init__.py @@ -120,6 +120,7 @@ create_environment, delete_environment, publish_environment, + list_environments, ) from sempy_labs._clear_cache import ( clear_cache, @@ -558,4 +559,5 @@ "delete_sql_database", "list_sql_databases", "delta_analyzer_history", + "list_environments", ] diff --git a/src/sempy_labs/_folders.py b/src/sempy_labs/_folders.py index a8531644..57739967 100644 --- a/src/sempy_labs/_folders.py +++ b/src/sempy_labs/_folders.py @@ -100,7 +100,7 @@ def create_folder( name, workspace: Optional[str | UUID] = None, parent_folder: Optional[str | UUID] = None, -): +) -> UUID: """ Creates a new folder in the specified workspace. @@ -114,6 +114,11 @@ def create_folder( or if no lakehouse attached, resolves to the workspace of the notebook. parent_folder : str | uuid.UUID, default=None The ID of the parent folder. If not provided, the folder will be created in the root folder of the workspace. + + Returns + ------- + uuid.UUID + The ID of the created folder. """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) @@ -128,7 +133,7 @@ def create_folder( parent_folder_id = resolve_folder_id(folder=parent_folder, workspace=workspace) payload["parentFolderId"] = parent_folder_id - _base_api( + response = _base_api( request=url, client="fabric_sp", method="post", @@ -140,6 +145,8 @@ def create_folder( f"{icons.green_dot} The '{name}' folder has been successfully created within the '{workspace_name}' workspace." ) + return response.json().get("id") + def resolve_folder_id( folder: str | UUID, workspace: Optional[str | UUID] = None diff --git a/src/sempy_labs/_items.py b/src/sempy_labs/_items.py index 43e87243..b617f41d 100644 --- a/src/sempy_labs/_items.py +++ b/src/sempy_labs/_items.py @@ -12,7 +12,11 @@ import json import sempy_labs._icons as icons from sempy_labs.lakehouse._blobs import list_blobs -from sempy_labs._folders import list_folders +from sempy_labs._folders import ( + list_folders, + create_folder, +) +import re # Item types which have definitions @@ -120,9 +124,9 @@ def restore_item_definitions( target_workspace ) - lakehouse_workspace_id = backup_file_path.split('abfss://')[1].split('@')[0] - lakehouse_id = backup_file_path.split('microsoft.com/')[1].split('/')[0] - folder_path = backup_file_path.split(f'microsoft.com/{lakehouse_id}/')[1] + lakehouse_workspace_id = backup_file_path.split("abfss://")[1].split("@")[0] + lakehouse_id = backup_file_path.split("microsoft.com/")[1].split("/")[0] + folder_path = backup_file_path.split(f"microsoft.com/{lakehouse_id}/")[1] blobs = list_blobs( lakehouse=lakehouse_id, workspace=lakehouse_workspace_id, container="Files" @@ -134,14 +138,36 @@ def restore_item_definitions( local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) - # Create the folder structure - #with open( - # f"{local_path}/Files/folderStructure.json", "r", encoding="utf-8" - #) as file: - # df_folders = pd.json_normalize(json.load(file)) - # for _, r in df_folders.iterrows(): - # folder_name = r["Folder Name"] - # folder_path = r["Folder Path"] + # Creating the folder structure + def ensure_folder_path_exists(folder_path): + # Normalize the paths if necessary + existing_paths = set( + dfF["Folder Path"].str.strip("/") + ) # remove leading/trailing slashes for easier comparison + + parts = folder_path.strip("/").split("/") + current_path = "" + + for part in parts: + if current_path: + current_path += "/" + part + else: + current_path = part + + if current_path not in existing_paths: + # Create the folder since it does not exist + parent_folder = ( + "/" + "/".join(current_path.split("/")[:-1]) + if "/" in current_path + else "/" + ) + # creation_folder = '/' + current_path + create_folder( + name=part, + workspace=target_workspace_id, + parent_folder=parent_folder, + ) + existing_paths.add(current_path) for _, r in blobs_filt.iterrows(): blob_name = r["Blob Name"] @@ -155,19 +181,35 @@ def restore_item_definitions( with open(definition_file_path, "r", encoding="utf-8") as file: definition = json.load(file) - description = definition.get("definition") + description = definition.get("description") folder_path = definition.get("folderPath") - - definition.pop("description") - definition.pop("folderPath") + raw_definition = definition.get('definition') payload = { "displayName": item_name, "type": item_type, - "description": description, - "definition": json.dumps(definition, indent=2), + "definition": raw_definition, } + if description: + payload["description"] = description + if folder_path: + dfF = list_folders(workspace=target_workspace_id) + dfF_filt = dfF[dfF["Folder Path"] == folder_path] + if not dfF_filt.empty: + folder_id = dfF_filt["Folder Id"].iloc[0] + else: + folder_id = None + # Create the folder if it does not exist + ensure_folder_path_exists(folder_path) + # Get the folder ID again after creating it + dfF = list_folders(workspace=target_workspace_id) + dfF_filt = dfF[dfF["Folder Path"] == folder_path] + if not dfF_filt.empty: + folder_id = dfF_filt["Folder Id"].iloc[0] + + payload["folderId"] = folder_id + # Create items... _base_api( request=f"/v1/workspaces/{target_workspace_id}/items", @@ -176,3 +218,16 @@ def restore_item_definitions( status_codes=[201, 202], lro_return_status_code=True, ) + + print(f"{icons.green_dot} Created the '{item_name}' {_split_camel_case(item_type)} within the '{target_workspace_name}' workspace") + + +def _split_camel_case(text): + # Find acronym groups or normal words + matches = re.finditer(r'([A-Z]+(?=[A-Z][a-z])|[A-Z][a-z]*)', text) + words = [m.group(0) for m in matches] + + # Lowercase normal words, keep acronyms as-is + words = [w if w.isupper() else w.lower() for w in words] + + return ' '.join(words)