diff --git a/configs/datasets/toy_point_cloud.yaml b/configs/datasets/toy_point_cloud.yaml new file mode 100644 index 00000000..b1545947 --- /dev/null +++ b/configs/datasets/toy_point_cloud.yaml @@ -0,0 +1,12 @@ +data_domain: point_cloud +data_type: toy_dataset +data_name: toy_point_cloud +data_dir: datasets/${data_domain}/${data_type} + +# Dataset parameters +num_points: 8 +num_classes: 2 + +num_features: 1 +task: classification +loss_type: cross_entropy diff --git a/configs/transforms/liftings/pointcloud2simplicial/witness_lifting.yaml b/configs/transforms/liftings/pointcloud2simplicial/witness_lifting.yaml new file mode 100644 index 00000000..bc8143a0 --- /dev/null +++ b/configs/transforms/liftings/pointcloud2simplicial/witness_lifting.yaml @@ -0,0 +1,3 @@ +transform_type: "lifting" +transform_name: "WitnessLifting" +feature_lifting: ProjectionSum diff --git a/modules/data/load/loaders.py b/modules/data/load/loaders.py index 8ccafb11..4ce265e3 100755 --- a/modules/data/load/loaders.py +++ b/modules/data/load/loaders.py @@ -12,6 +12,7 @@ load_cell_complex_dataset, load_hypergraph_pickle_dataset, load_manual_graph, + load_point_cloud, load_simplicial_dataset, ) @@ -204,3 +205,34 @@ def load( torch_geometric.data.Dataset object containing the loaded data. """ return load_hypergraph_pickle_dataset(self.parameters) + + +class PointCloudLoader(AbstractLoader): + r"""Loader for point-cloud dataset. + Parameters + ---------- + parameters: DictConfig + Configuration parameters + """ + + def __init__(self, parameters: DictConfig): + super().__init__(parameters) + self.parameters = parameters + self.data_dir = self.parameters["data_dir"] + if "num_classes" not in self.cfg: + self.cfg["num_classes"] = 2 + + def load(self) -> torch_geometric.data.Dataset: + r"""Load point-cloud dataset. + Parameters + ---------- + None + Returns + ------- + torch_geometric.data.Dataset + torch_geometric.data.Dataset object containing the loaded data. + """ + data = load_point_cloud( + num_classes=self.cfg["num_classes"], num_points=self.cfg["num_points"] + ) + return CustomDataset([data], self.cfg["data_dir"]) diff --git a/modules/data/utils/utils.py b/modules/data/utils/utils.py index 93ab5021..f1a913bd 100755 --- a/modules/data/utils/utils.py +++ b/modules/data/utils/utils.py @@ -50,16 +50,16 @@ def get_complex_connectivity(complex, max_rank, signed=False): ) except ValueError: # noqa: PERF203 if connectivity_info == "incidence": - connectivity[f"{connectivity_info}_{rank_idx}"] = ( - generate_zero_sparse_connectivity( - m=practical_shape[rank_idx - 1], n=practical_shape[rank_idx] - ) + connectivity[ + f"{connectivity_info}_{rank_idx}" + ] = generate_zero_sparse_connectivity( + m=practical_shape[rank_idx - 1], n=practical_shape[rank_idx] ) else: - connectivity[f"{connectivity_info}_{rank_idx}"] = ( - generate_zero_sparse_connectivity( - m=practical_shape[rank_idx], n=practical_shape[rank_idx] - ) + connectivity[ + f"{connectivity_info}_{rank_idx}" + ] = generate_zero_sparse_connectivity( + m=practical_shape[rank_idx], n=practical_shape[rank_idx] ) connectivity["shape"] = practical_shape return connectivity @@ -283,6 +283,17 @@ def load_hypergraph_pickle_dataset(cfg): return data +def load_point_cloud(num_classes: int = 2, num_points: int = 18, seed: int = 42): + """Create a toy point cloud dataset""" + rng = np.random.default_rng(seed) + + points = torch.tensor(rng.random((num_points, 2)), dtype=torch.float) + classes = torch.tensor(rng.integers(num_classes, size=num_points), dtype=torch.long) + features = torch.tensor(rng.integers(3, size=(num_points, 1)), dtype=torch.float) + + return torch_geometric.data.Data(x=features, y=classes, pos=points) + + def load_manual_graph(): """Create a manual graph for testing purposes.""" # Define the vertices (just 8 vertices) diff --git a/modules/transforms/data_transform.py b/modules/transforms/data_transform.py index 59253ecf..6a950ce8 100755 --- a/modules/transforms/data_transform.py +++ b/modules/transforms/data_transform.py @@ -15,6 +15,9 @@ from modules.transforms.liftings.graph2simplicial.clique_lifting import ( SimplicialCliqueLifting, ) +from modules.transforms.liftings.pointcloud2simplicial.witness_lifting import ( + WitnessLifting, +) TRANSFORMS = { # Graph -> Hypergraph @@ -23,6 +26,8 @@ "SimplicialCliqueLifting": SimplicialCliqueLifting, # Graph -> Cell Complex "CellCycleLifting": CellCycleLifting, + # Point-cloud -> Simplicial Complex + "WitnessLifting": WitnessLifting, # Feature Liftings "ProjectionSum": ProjectionSum, # Data Manipulations diff --git a/modules/transforms/liftings/pointcloud2simplicial/witness_lifting.py b/modules/transforms/liftings/pointcloud2simplicial/witness_lifting.py new file mode 100644 index 00000000..bf843903 --- /dev/null +++ b/modules/transforms/liftings/pointcloud2simplicial/witness_lifting.py @@ -0,0 +1,74 @@ +import gudhi as gd +import torch +import torch_geometric +from toponetx.classes import SimplicialComplex + +from modules.data.utils.utils import get_complex_connectivity +from modules.transforms.liftings.pointcloud2simplicial.base import ( + PointCloud2SimplicialLifting, +) + + +class WitnessLifting(PointCloud2SimplicialLifting): + def __init__( + self, + is_weak=True, + is_euclidian=True, + landmark_proportion: int = 0.8, + max_alpha_square=0.15, + complex_dim=2, + seed=42, + **kwargs, + ): + super().__init__(**kwargs) + self.is_weak = is_weak + self.is_euclidian = is_euclidian + self.landmark_proportion = landmark_proportion + self.max_alpha_square = max_alpha_square + self.complex_dim = complex_dim + self.seed = seed + torch.manual_seed(seed) + + def _get_lifted_topology(self, simplicial_complex: SimplicialComplex) -> dict: + r"""Returns the lifted topology. + Parameters + ---------- + simplicial_complex : SimplicialComplex + The simplicial complex. + Returns + --------- + dict + The lifted topology. + """ + lifted_topology = get_complex_connectivity(simplicial_complex, self.complex_dim) + lifted_topology["x_0"] = torch.stack( + list(simplicial_complex.get_simplex_attributes("features", 0).values()) + ) + + return lifted_topology + + def lift_topology( + self, + witnesses: torch_geometric.data.Data, + ) -> dict: + n = len(witnesses.pos) + + perm = torch.randperm(n) + idx = perm[: round(n * self.landmark_proportion)] + landmarks_position = witnesses.pos[idx] + + if self.is_euclidian: + if self.is_weak: + complex = gd.EuclideanWitnessComplex(witnesses.pos, landmarks_position) + simplex_tree = complex.create_simplex_tree( + self.max_alpha_square, self.complex_dim + ) + + simplicial_complex = SimplicialComplex.from_gudhi(simplex_tree) + else: + pass + + node_features = {i: witnesses.x[i, :] for i in range(witnesses.x.shape[0])} + simplicial_complex.set_simplex_attributes(node_features, name="features") + + return self._get_lifted_topology(simplicial_complex) diff --git a/test/transforms/liftings/pointcloud2simplicial/test_witness_lifting.py b/test/transforms/liftings/pointcloud2simplicial/test_witness_lifting.py new file mode 100644 index 00000000..592955b8 --- /dev/null +++ b/test/transforms/liftings/pointcloud2simplicial/test_witness_lifting.py @@ -0,0 +1,52 @@ +import torch + +from modules.data.utils.utils import load_point_cloud +from modules.transforms.liftings.pointcloud2simplicial.witness_lifting import ( + WitnessLifting, +) + + +class TestWitnessLifting: + """Test the WitnessLifting class.""" + + def setup_method(self): + # Load the point cloud + SEED = 42 + self.data = load_point_cloud(num_points=5, seed=SEED) + + # Initialise the WitnessLifting class + self.lifting_signed = WitnessLifting(signed=True, seed=SEED) + self.lifting_unsigned = WitnessLifting(signed=False, seed=SEED) + + def test_lift_topology(self): + """Test the lift_topology method.""" + + # Test the lift_topology method + lifted_data_signed = self.lifting_signed.forward(self.data.clone()) + lifted_data_unsigned = self.lifting_unsigned.forward(self.data.clone()) + + expected_incidence_1 = torch.tensor( + [ + [1.0, 1.0, 1.0, 0.0, 0.0], + [1.0, 0.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 1.0], + [0.0, 1.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 1.0, 0.0, 1.0], + ] + ) + + assert ( + abs(expected_incidence_1) == lifted_data_unsigned.incidence_1.to_dense() + ).all(), "Something is wrong with unsigned incidence_1 (nodes to edges)." + assert ( + expected_incidence_1 == lifted_data_signed.incidence_1.to_dense() + ).all(), "Something is wrong with signed incidence_1 (nodes to edges)." + + expected_incidence_2 = torch.tensor([[1.0], [1.0], [0.0], [1.0], [0.0]]) + + assert ( + abs(expected_incidence_2) == lifted_data_unsigned.incidence_2.to_dense() + ).all(), "Something is wrong with unsigned incidence_2 (edges to triangles)." + assert ( + expected_incidence_2 == lifted_data_signed.incidence_2.to_dense() + ).all(), "Something is wrong with signed incidence_2 (edges to triangles)." diff --git a/tutorials/pointcloud2simplicial/witness_lifting.ipynb b/tutorials/pointcloud2simplicial/witness_lifting.ipynb new file mode 100644 index 00000000..7aa7c259 --- /dev/null +++ b/tutorials/pointcloud2simplicial/witness_lifting.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Point Cloud-to-Simplicial Complex Lifting Tutorial\n", + "\n", + "***\n", + "This notebook shows how to import a dataset, with the desired lifting, and how to run a neural network using the loaded data.\n", + "\n", + "The notebook is divided into sections:\n", + "\n", + "- [Loading the dataset](#loading-the-dataset) loads the config files for the data and the desired tranformation, createsa a dataset object and visualizes it.\n", + "- [Loading and applying the lifting](#loading-and-applying-the-lifting) defines a simple neural network to test that the lifting creates the expected incidence matrices.\n", + "- [Create and run a simplicial nn model](#create-and-run-a-simplicial-nn-model) simply runs a forward pass of the model to check that everything is working as expected.\n", + "\n", + "***\n", + "***\n", + "\n", + "Note that for simplicity the notebook is setup to use a simple graph. However, there is a set of available datasets that you can play with.\n", + "\n", + "To switch to one of the available datasets, simply change the *dataset_name* variable in [Dataset config](#dataset-config) to one of the following names:\n", + "\n", + "* cocitation_cora\n", + "* cocitation_citeseer\n", + "* cocitation_pubmed\n", + "* MUTAG\n", + "* NCI1\n", + "* NCI109\n", + "* PROTEINS_TU\n", + "* AQSOL\n", + "* ZINC\n", + "***" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Imports and utilities" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "from modules.data.load.loaders import PointCloudLoader\n", + "from modules.data.preprocess.preprocessor import PreProcessor\n", + "from modules.utils.utils import (\n", + " describe_data,\n", + " load_dataset_config,\n", + " load_model_config,\n", + " load_transform_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we just need to spicify the name of the available dataset that we want to load. First, the dataset config is read from the corresponding yaml file (located at `/configs/datasets/` directory), and then the data is loaded via the implemented `Loaders`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then access to the data through the `load()` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dataset configuration for toy_point_cloud:\n", + "\n", + "{'data_domain': 'point_cloud',\n", + " 'data_type': 'toy_dataset',\n", + " 'data_name': 'toy_point_cloud',\n", + " 'data_dir': 'datasets/point_cloud/toy_dataset',\n", + " 'num_points': 8,\n", + " 'num_classes': 2,\n", + " 'num_features': 1,\n", + " 'task': 'classification',\n", + " 'loss_type': 'cross_entropy'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + } + ], + "source": [ + "dataset_name = \"toy_point_cloud\"\n", + "dataset_config = load_dataset_config(dataset_name)\n", + "loader = PointCloudLoader(dataset_config)\n", + "\n", + "dataset = loader.load()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading and Applying the Lifting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section we will instantiate the whitness lifting. This lifting constructs a simplicial complex from a point cloud, called the **Witness complex** using the `GUDHI` library [[1]](https://gudhi.inria.fr/doc/latest/group__witness__complex.html). In the current implementation the Witness complex is defined over a point cloud $W$, called *witnesses*. From this set, a subset $L$ of *landmarks* is formed by randomly choosing the points. \n", + "\n", + "Current implementation can *only* lift to a *weak Euclidian Witness complex*:\n", + " - *weak* means that $\\sigma \\in L$ is witnessed by $w\\in W$ if $\\forall l\\in \\sigma, \\forall l' \\in L: d(w,l)\\leq d(w,l')$\n", + " - *euclidian* means that the set of witnesses and landmarks are in Euclidian space.\n", + "\n", + "\n", + "\n", + "***\n", + "[[1]](https://gudhi.inria.fr/doc/latest/group__witness__complex.html) Kachanovich S. (2020). Witness complex. GUDHI User and Reference Manual.\n", + "***\n", + "\n", + "For simplicial complexes creating a lifting involves creating a `SimplicialComplex` object from topomodelx and adding simplices to it using the method `add_simplices_from`. The `SimplicialComplex` class then takes care of creating all the needed matrices.\n", + "\n", + "Similarly to before, we can specify the transformation we want to apply through its type and id --the correxponding config files located at `/configs/transforms`. \n", + "\n", + "Note that the *tranform_config* dictionary generated below can contain a sequence of tranforms if it is needed.\n", + "\n", + "This can also be used to explore liftings from one topological domain to another, for example using two liftings it is possible to achieve a sequence such as: graph -> simplicial complex -> hypergraph. " + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Transform configuration for pointcloud2simplicial/witness_lifting:\n", + "\n", + "{'transform_type': 'lifting',\n", + " 'transform_name': 'WitnessLifting',\n", + " 'feature_lifting': 'ProjectionSum'}\n" + ] + } + ], + "source": [ + "transform_type = \"liftings\"\n", + "# If the transform is a topological lifting, it should include both the type of the lifting and the identifier\n", + "transform_id = \"pointcloud2simplicial/witness_lifting\"\n", + "\n", + "# Read yaml file\n", + "transform_config = {\n", + " \"lifting\": load_transform_config(transform_type, transform_id)\n", + " # other transforms (e.g. data manipulations, feature liftings) can be added here\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We than apply the transform via our `PreProcesor`:" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dataset only contains 1 sample:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - The complex has 8 0-cells.\n", + " - The 0-cells have features dimension 1\n", + " - The complex has 12 1-cells.\n", + " - The 1-cells have features dimension 1\n", + " - The complex has 7 2-cells.\n", + " - The 2-cells have features dimension 1\n", + "\n" + ] + } + ], + "source": [ + "lifted_dataset = PreProcessor(dataset, transform_config, loader.data_dir)\n", + "describe_data(lifted_dataset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create and Run a Simplicial NN Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section a simple model is created to test that the used lifting works as intended. In this case the model uses the `up_laplacian_1` and the `down_laplacian_1` so the lifting should make sure to add them to the data." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Model configuration for simplicial SAN:\n", + "\n", + "{'in_channels': None,\n", + " 'hidden_channels': 32,\n", + " 'out_channels': None,\n", + " 'n_layers': 2,\n", + " 'n_filters': 2,\n", + " 'order_harmonic': 5,\n", + " 'epsilon_harmonic': 0.1}\n" + ] + } + ], + "source": [ + "from modules.models.simplicial.san import SANModel\n", + "\n", + "model_type = \"simplicial\"\n", + "model_id = \"san\"\n", + "model_config = load_model_config(model_type, model_id)\n", + "\n", + "model = SANModel(model_config, dataset_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "y_hat = model(lifted_dataset.get(0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If everything is correct the cell above should execute without errors. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "topox", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}