From ae8cc9f4d8c10ef973c8b9ae20fa546e36cd19ea Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Fri, 23 May 2025 16:59:56 -0400
Subject: [PATCH 01/20] Remove stubs

---
 docs/source/matlab/symktensor.rst |  8 --------
 docs/source/matlab/symtensor.rst  |  8 --------
 pyttb/__init__.py                 |  6 ------
 pyttb/sptensor3.py                | 12 ------------
 pyttb/symktensor.py               | 12 ------------
 pyttb/symtensor.py                | 12 ------------
 tests/test_sptensor3.py           | 13 -------------
 tests/test_symktensor.py          | 13 -------------
 tests/test_symtensor.py           | 13 -------------
 9 files changed, 97 deletions(-)
 delete mode 100644 docs/source/matlab/symktensor.rst
 delete mode 100644 docs/source/matlab/symtensor.rst
 delete mode 100644 pyttb/sptensor3.py
 delete mode 100644 pyttb/symktensor.py
 delete mode 100644 pyttb/symtensor.py
 delete mode 100644 tests/test_sptensor3.py
 delete mode 100644 tests/test_symktensor.py
 delete mode 100644 tests/test_symtensor.py

diff --git a/docs/source/matlab/symktensor.rst b/docs/source/matlab/symktensor.rst
deleted file mode 100644
index 19e215a8..00000000
--- a/docs/source/matlab/symktensor.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-``symktensor``
---------------------
-
-Data members
-^^^^^^^^^^^^
-
-Methods
-^^^^^^^
\ No newline at end of file
diff --git a/docs/source/matlab/symtensor.rst b/docs/source/matlab/symtensor.rst
deleted file mode 100644
index 8d673c32..00000000
--- a/docs/source/matlab/symtensor.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-``symtensor``
--------------------
-
-Data members
-^^^^^^^^^^^^
-
-Methods
-^^^^^^^
\ No newline at end of file
diff --git a/pyttb/__init__.py b/pyttb/__init__.py
index dbec0889..87a70514 100644
--- a/pyttb/__init__.py
+++ b/pyttb/__init__.py
@@ -22,10 +22,7 @@
 from pyttb.matlab import matlab_support
 from pyttb.sptenmat import sptenmat
 from pyttb.sptensor import sptendiag, sptenrand, sptensor
-from pyttb.sptensor3 import sptensor3
 from pyttb.sumtensor import sumtensor
-from pyttb.symktensor import symktensor
-from pyttb.symtensor import symtensor
 from pyttb.tenmat import tenmat
 from pyttb.tensor import tendiag, teneye, tenones, tenrand, tensor, tenzeros
 from pyttb.ttensor import ttensor
@@ -55,10 +52,7 @@ def ignore_warnings(ignore=True):
     sptendiag.__name__,
     sptenrand.__name__,
     sptensor.__name__,
-    sptensor3.__name__,
     sumtensor.__name__,
-    symktensor.__name__,
-    symtensor.__name__,
     teneye.__name__,
     tenmat.__name__,
     tendiag.__name__,
diff --git a/pyttb/sptensor3.py b/pyttb/sptensor3.py
deleted file mode 100644
index 2d469b06..00000000
--- a/pyttb/sptensor3.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Sparse Tensor 3 Class Placeholder."""
-
-# Copyright 2025 National Technology & Engineering Solutions of Sandia,
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
-# U.S. Government retains certain rights in this software.
-
-
-class sptensor3:
-    """A sparse tensor variant."""
-
-    def __init__(self):
-        assert False, "SPTENSOR3 class not yet implemented"
diff --git a/pyttb/symktensor.py b/pyttb/symktensor.py
deleted file mode 100644
index 67a05e26..00000000
--- a/pyttb/symktensor.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Symmetric Kruskal Tensor Class Placeholder."""
-
-# Copyright 2025 National Technology & Engineering Solutions of Sandia,
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
-# U.S. Government retains certain rights in this software.
-
-
-class symktensor:
-    """Class for symmetric Kruskal tensors (decomposed)."""
-
-    def __init__(self):
-        assert False, "SYMKTENSOR class not yet implemented"
diff --git a/pyttb/symtensor.py b/pyttb/symtensor.py
deleted file mode 100644
index bd57e5c0..00000000
--- a/pyttb/symtensor.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Symmetric Tensor Class Placeholder."""
-
-# Copyright 2025 National Technology & Engineering Solutions of Sandia,
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
-# U.S. Government retains certain rights in this software.
-
-
-class symtensor:
-    """Class for storing only unique entries of symmetric tensor."""
-
-    def __init__(self):
-        assert False, "SYMTENSOR class not yet implemented"
diff --git a/tests/test_sptensor3.py b/tests/test_sptensor3.py
deleted file mode 100644
index fd7cd94f..00000000
--- a/tests/test_sptensor3.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2024 National Technology & Engineering Solutions of Sandia,
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
-# U.S. Government retains certain rights in this software.
-
-import pytest
-
-import pyttb as ttb
-
-
-def test_sptensor3_initialization_empty():
-    with pytest.raises(AssertionError) as excinfo:
-        ttb.sptensor3()
-    assert "SPTENSOR3 class not yet implemented" in str(excinfo)
diff --git a/tests/test_symktensor.py b/tests/test_symktensor.py
deleted file mode 100644
index 0265d6a7..00000000
--- a/tests/test_symktensor.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2024 National Technology & Engineering Solutions of Sandia,
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
-# U.S. Government retains certain rights in this software.
-
-import pytest
-
-import pyttb as ttb
-
-
-def test_symktensor_initialization_empty():
-    with pytest.raises(AssertionError) as excinfo:
-        ttb.symktensor()
-    assert "SYMKTENSOR class not yet implemented" in str(excinfo)
diff --git a/tests/test_symtensor.py b/tests/test_symtensor.py
deleted file mode 100644
index 5ee45bcf..00000000
--- a/tests/test_symtensor.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright 2024 National Technology & Engineering Solutions of Sandia,
-# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
-# U.S. Government retains certain rights in this software.
-
-import pytest
-
-import pyttb as ttb
-
-
-def test_symtensor_initialization_empty():
-    with pytest.raises(AssertionError) as excinfo:
-        ttb.symtensor()
-    assert "SYMTENSOR class not yet implemented" in str(excinfo)

From fb6f006bf0be37dc5efceaf6123710c821677b89 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Fri, 23 May 2025 17:07:08 -0400
Subject: [PATCH 02/20] Fix rst keyword typo

---
 pyttb/sptenmat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyttb/sptenmat.py b/pyttb/sptenmat.py
index 8670da5d..9a0a143c 100644
--- a/pyttb/sptenmat.py
+++ b/pyttb/sptenmat.py
@@ -40,7 +40,7 @@ def __init__(  # noqa: PLR0913
         and values (vals) along with the mappings of the row (rdims) and column
         indices (cdims) and the shape of the original tensor (tshape).
 
-        If you already have an sparse tensor see :method:`pyttb.sptensor.to_sptenmat`.
+        If you already have an sparse tensor see :meth:`pyttb.sptensor.to_sptenmat`.
 
         Parameters
         ----------

From c6e091e611037249a75b3a9c09b98d7de618f4e3 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Fri, 23 May 2025 17:07:37 -0400
Subject: [PATCH 03/20] Handle numerical precision error seen locally

---
 pyttb/ktensor.py      | 27 ++++++++++++++++++---------
 tests/test_ktensor.py | 16 ++++++++++------
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/pyttb/ktensor.py b/pyttb/ktensor.py
index 11ed3f19..a2637f8b 100644
--- a/pyttb/ktensor.py
+++ b/pyttb/ktensor.py
@@ -1606,7 +1606,7 @@ def score(
         component :class:`pyttb.ktensor` instances that have been normalized
         so that their weights are `self.weights` and `other.weights`, and their
         factor matrices are single column vectors containing [a1,a2,...,an] and
-        [b1,b2,...bn], rescpetively, then the score is defined as
+        [b1,b2,...bn], respectively, then the score is defined as
 
             score = penalty * (a1.T*b1) * (a2.T*b2) * ... * (an.T*bn),
 
@@ -1653,23 +1653,31 @@ def score(
         Create two :class:`pyttb.ktensor` instances and compute the score
         between them:
 
-        >>> factors = [np.ones((3, 3)), np.ones((4, 3)), np.ones((5, 3))]
+        >>> factors = [
+        ...     np.ones((3, 3)) + 0.1,
+        ...     np.ones((4, 3)) + 0.2,
+        ...     np.ones((5, 3)) + 0.3,
+        ... ]
         >>> weights = np.array([2.0, 1.0, 3.0])
         >>> K = ttb.ktensor(factors, weights)
-        >>> factors_2 = [np.ones((3, 2)), np.ones((4, 2)), np.ones((5, 2))]
+        >>> factors_2 = [
+        ...     np.ones((3, 2)) + 0.1,
+        ...     np.ones((4, 2)) + 0.2,
+        ...     np.ones((5, 2)) + 0.3,
+        ... ]
         >>> weights_2 = np.array([2.0, 4.0])
         >>> K2 = ttb.ktensor(factors_2, weights_2)
         >>> score, Kperm, flag, perm = K.score(K2)
-        >>> print(score)
-        0.875
+        >>> print(np.isclose(score, 0.875))
+        True
         >>> print(perm)
         [0 2 1]
 
         Compute score without using weights:
 
         >>> score, Kperm, flag, perm = K.score(K2, weight_penalty=False)
-        >>> print(score)
-        1.0
+        >>> print(np.isclose(score, 1.0))
+        True
         >>> print(perm)
         [0 1 2]
         """
@@ -1733,8 +1741,9 @@ def score(
             best_perm = -1 * np.ones((RA), dtype=int)
             best_score = 0.0
             for _ in range(RB):
-                idx = np.argmax(C.reshape(prod(C.shape), order=self.order))
-                ij = tt_ind2sub((RA, RB), np.array(idx))
+                flatten_C = C.reshape(prod(C.shape), order=self.order)
+                idx = np.argmax(flatten_C)
+                ij = tt_ind2sub((RA, RB), np.array(idx, dtype=int), order=self.order)
                 best_score = best_score + C[ij[0], ij[1]]
                 C[ij[0], :] = -10
                 C[:, ij[1]] = -10
diff --git a/tests/test_ktensor.py b/tests/test_ktensor.py
index 6c560c26..6abda068 100644
--- a/tests/test_ktensor.py
+++ b/tests/test_ktensor.py
@@ -779,23 +779,27 @@ def test_ktensor_redistribute(sample_ktensor_2way):
 
 def test_ktensor_score():
     A = ttb.ktensor(
-        [np.ones((3, 3)), np.ones((4, 3)), np.ones((5, 3))], np.array([2.0, 1.0, 3.0])
+        [np.ones((3, 3)) + 0.1, np.ones((4, 3)) + 0.2, np.ones((5, 3)) + 0.3],
+        np.array([2.0, 1.0, 3.0]),
     )
     B = ttb.ktensor(
-        [np.ones((3, 2)), np.ones((4, 2)), np.ones((5, 2))], np.array([2.0, 4.0])
+        [np.ones((3, 2)) + 0.1, np.ones((4, 2)) + 0.2, np.ones((5, 2)) + 0.3],
+        np.array([2.0, 4.0]),
     )
 
+    A_norm = A.copy().normalize()
+
     # defaults
     score, Aperm, flag, best_perm = A.score(B)
-    assert score == 0.875
-    assert np.allclose(Aperm.weights, np.array([15.49193338, 23.23790008, 7.74596669]))
+    assert np.isclose(score, 0.875)
+    assert np.allclose(Aperm.weights, A_norm.weights[best_perm])
     assert flag
     assert np.array_equal(best_perm, np.array([0, 2, 1]))
 
     # compare just factor matrices (i.e., do not use weights)
     score, Aperm, flag, best_perm = A.score(B, weight_penalty=False)
-    assert score == 1.0
-    assert np.allclose(Aperm.weights, np.array([15.49193338, 7.74596669, 23.23790008]))
+    assert np.isclose(score, 1.0)
+    assert np.allclose(Aperm.weights, A_norm.weights[best_perm])
     assert not flag
     assert np.array_equal(best_perm, np.array([0, 1, 2]))
 

From cc17f7c91c20099eee08966211018d05bd3ba411 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Sat, 24 May 2025 17:44:56 -0400
Subject: [PATCH 04/20] Add coverage for our missed doc components

---
 docs/source/index.rst                     | 5 +++++
 docs/source/io.rst                        | 6 ++++++
 docs/source/matlab/additional_support.rst | 4 ++++
 docs/source/pyttb_utils.rst               | 6 ++++--
 docs/source/reference.rst                 | 1 +
 5 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 docs/source/io.rst
 create mode 100644 docs/source/matlab/additional_support.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 930010c1..3183c0a8 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -47,8 +47,13 @@ algorithms for computing low-rank tensor models.
    decompositions such as Poisson Tensor Factorization via alternating
    Poisson regression.
 
+- `IO`_
+
+   Storing and retrieving tensors from disk.
+
 .. _Tensor Classes: tensor_classes.html
 .. _Algorithms: algorithms.html
+.. _IO: io.html
 
 
 Getting Started
diff --git a/docs/source/io.rst b/docs/source/io.rst
new file mode 100644
index 00000000..10ccc1fc
--- /dev/null
+++ b/docs/source/io.rst
@@ -0,0 +1,6 @@
+Input/Output
+------------
+Storing or reading tensors from disk.
+
+.. autofunction:: pyttb.import_data.import_data
+.. autofunction:: pyttb.export_data.export_data
\ No newline at end of file
diff --git a/docs/source/matlab/additional_support.rst b/docs/source/matlab/additional_support.rst
new file mode 100644
index 00000000..f1df8eab
--- /dev/null
+++ b/docs/source/matlab/additional_support.rst
@@ -0,0 +1,4 @@
+Additional Utilities For MATLAB User Transition
+-----------------------------------------------
+
+.. autofunction:: pyttb.matlab.matlab_support.matlab_print
\ No newline at end of file
diff --git a/docs/source/pyttb_utils.rst b/docs/source/pyttb_utils.rst
index 7cc02f4c..0627df89 100644
--- a/docs/source/pyttb_utils.rst
+++ b/docs/source/pyttb_utils.rst
@@ -1,5 +1,7 @@
-Helper Functions (:mod:`pyttb_utils`)
--------------------------------------
+Helper Functions (:mod:`pyttb_utils`, :mod:`khatrirao`)
+--------------------------------------------------------
+
+.. autofunction:: pyttb.khatrirao.khatrirao
 
 .. automodule:: pyttb.pyttb_utils
     :members:
diff --git a/docs/source/reference.rst b/docs/source/reference.rst
index 46cab023..8d7529fc 100644
--- a/docs/source/reference.rst
+++ b/docs/source/reference.rst
@@ -6,3 +6,4 @@ Reference (:mod:`pyttb`)
 
     tensor_classes.rst
     algorithms.rst
+    io.rst

From 54c12c6f7bf31a857b0aff3a7cabb122919606bb Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Sat, 24 May 2025 18:04:55 -0400
Subject: [PATCH 05/20] Add python 3.13 but only do coveralls for oldest
 supported

---
 .github/workflows/regression-tests.yml | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/regression-tests.yml b/.github/workflows/regression-tests.yml
index 72dda521..68c8f436 100644
--- a/.github/workflows/regression-tests.yml
+++ b/.github/workflows/regression-tests.yml
@@ -15,8 +15,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+      matrix: # Keep these in ascending order for automagic with coverage
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
     - uses: actions/checkout@v4
@@ -32,13 +32,18 @@ jobs:
         python -c "import pyttb"
     - name: Install dev dependencies
       run: |
-        python -m pip install --upgrade coverage coveralls sphinx_rtd_theme
+        python -m pip install --upgrade coverage sphinx_rtd_theme
         pip install ".[dev]"
     - name: Run tests
       run: |
         coverage run --source pyttb -m pytest tests/
         coverage report
+    - name: Add coveralls dependencies
+      if: strategy.job-index == 0
+      run: |
+        python -m pip install --upgrade coveralls
     - name: Upload coverage to Coveralls
+      if: strategy.job-index == 0
       uses: coverallsapp/github-action@v2
       #env:
       #  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From ab81d8c2968eafcf851201756c133374ebaae7a2 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Fri, 30 May 2025 18:37:40 -0400
Subject: [PATCH 06/20] Bulk on non-missing data support

---
 pyproject.toml               |   2 +
 pyttb/create_problem.py      | 207 +++++++++++++++++++++++++++++++++++
 tests/test_create_problem.py |  50 +++++++++
 3 files changed, 259 insertions(+)
 create mode 100644 pyttb/create_problem.py
 create mode 100644 tests/test_create_problem.py

diff --git a/pyproject.toml b/pyproject.toml
index 45604a84..41261b93 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,8 @@ ignore = [
     "B011",
     # There is ongoing discussion about logging/warning etc
     "B028",
+    # Personal preference on magic method
+    "D105",
 ]
 [tool.ruff.lint.pydocstyle]
 convention = "numpy"
diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
new file mode 100644
index 00000000..0eae152c
--- /dev/null
+++ b/pyttb/create_problem.py
@@ -0,0 +1,207 @@
+"""Create test problems for  tensor factorizations."""
+
+from dataclasses import dataclass
+from typing import Callable, Optional, Tuple, Union, cast, overload
+
+import numpy as np
+
+import pyttb as ttb
+from pyttb.pyttb_utils import Shape
+
+solution_generator = Callable[[Tuple[int, ...]], np.ndarray]
+
+
+def randn(shape: Tuple[int, ...]) -> np.ndarray:
+    """Stub for MATLAB randn.
+
+    TODO move somewhere shareable.
+    """
+    return np.random.normal(0, 1, size=shape)
+
+
+@dataclass
+class BaseProblem:
+    """Parameters general to all solutions."""
+
+    shape: Shape
+    factor_generator: solution_generator = randn
+    symmetric: Optional[list[Tuple[int, int]]] = None
+    num_factors: Union[int, list[int], None] = None
+
+    def __post_init__(self):
+        self.shape = ttb.pyttb_utils.parse_shape(self.shape)
+
+
+@dataclass
+class CPProblem(BaseProblem):
+    """Parameters specifying CP Solutions."""
+
+    num_factors: int = 2
+    # TODO probably rename weight generator for consistency
+    lambda_generator: solution_generator = np.random.random
+
+
+@dataclass
+class TuckerProblem(BaseProblem):
+    """Parameters specifying Tucker Solutions."""
+
+    # TODO post_init set to [2, 2, 2]
+    num_factors: Optional[list[int]] = None
+    core_generator: solution_generator = randn
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.num_factors = self.num_factors or [2, 2, 2]
+
+
+@dataclass
+class DataParams:
+    """Parameters to control data quality."""
+
+    noise: float = 0.10
+    # TODO handle weird sparse_generation option
+
+    def __post_init__(
+        self,
+    ):
+        if not 0.0 <= self.noise <= 1.0:
+            raise ValueError(f"Noise must be in [0,1] but got {self.noise}")
+
+
+@dataclass
+class MissingData:
+    """Parameters to control missing data."""
+
+    missing_ratio: float = 0.0
+    sparse_model: bool = False
+    # TODO add spare pattern tensor
+
+    def __post_init__(self):
+        if not 0.0 <= self.missing_ratio <= 1.0:
+            raise ValueError(
+                f"Missing ratio must be in [0,1] but got {self.missing_ratio}"
+            )
+
+        if self.sparse_model and self.missing_ratio > 0.0:
+            raise ValueError("Can't combine missing data and sparse generation.")
+
+    def has_missing(self) -> bool:
+        """Check if any form of missing data is requested."""
+        return self.sparse_model or self.missing_ratio > 0.0
+
+    def raise_symmetric(self):
+        """Raise for unsupported symmetry request."""
+        if self.missing_ratio:
+            raise ValueError("Can't generate a symmetric problem with missing data.")
+        if self.sparse_model:
+            raise ValueError("Can't generate sparse symmetric problem.")
+
+
+@overload
+def create_problem(
+    problem_params: CPProblem, missing_params: MissingData, data_params: DataParams
+) -> Tuple[ttb.ktensor, ttb.tensor]: ...  # pragma: no cover see coveragepy/issues/970
+
+
+@overload
+def create_problem(
+    problem_params: TuckerProblem, missing_params: MissingData, data_params: DataParams
+) -> Tuple[ttb.ttensor, ttb.tensor]: ...  # pragma: no cover see coveragepy/issues/970
+
+
+def create_problem(
+    problem_params: Union[CPProblem, TuckerProblem],
+    missing_params: MissingData,
+    data_params: DataParams,
+) -> Tuple[Union[ttb.ktensor, ttb.ttensor], ttb.tensor]:
+    """Generate a problem and solution."""
+    if problem_params.symmetric is not None:
+        missing_params.raise_symmetric()
+
+    solution = generate_solution(problem_params)
+
+    if missing_params.sparse_model:
+        raise NotImplementedError("Sparse generation not yet supported")
+
+    data = generate_data(solution, problem_params, data_params)
+    return solution, data
+
+
+def generate_solution_factors(base_params: BaseProblem) -> list[np.ndarray]:
+    """Generate the factor matrices for either type of solution."""
+    # Get shape of final tensor
+    shape = cast(Tuple[int, ...], base_params.shape)
+
+    # Get shape of factors
+    if isinstance(base_params.num_factors, int):
+        nfactors = [base_params.num_factors] * len(shape)
+    elif base_params.num_factors is not None:
+        nfactors = base_params.num_factors
+    else:
+        raise ValueError("Num_factors shouldn't be none.")
+    if len(nfactors) != len(shape):
+        raise ValueError(
+            "Num_factors should be the same dimensions as shape but got"
+            f"{nfactors} and {shape}"
+        )
+    factor_matrices = []
+    for shape_i, nfactors_i in zip(shape, nfactors):
+        factor_matrices.append(base_params.factor_generator((shape_i, nfactors_i)))
+
+    if base_params.symmetric is not None:
+        for grp in base_params.symmetric:
+            # TODO see if this can be a single indexed op
+            for j in range(1, len(grp)):
+                factor_matrices[grp[j]] = factor_matrices[grp[0]]
+
+    return factor_matrices
+
+
+@overload
+def generate_solution(
+    problem_params: TuckerProblem,
+) -> ttb.ttensor: ...
+
+
+@overload
+def generate_solution(
+    problem_params: CPProblem,
+) -> ttb.ktensor: ...
+
+
+def generate_solution(
+    problem_params: Union[CPProblem, TuckerProblem],
+) -> Union[ttb.ktensor, ttb.ttensor]:
+    """Generate problem solution."""
+    factor_matrices = generate_solution_factors(problem_params)
+    # Create final model
+    if isinstance(problem_params, TuckerProblem):
+        nfactors = cast(list[int], problem_params.num_factors)
+        core = ttb.tensor(problem_params.core_generator(tuple(nfactors)))
+        return ttb.ttensor(core, factor_matrices)
+    elif isinstance(problem_params, CPProblem):
+        weights = problem_params.lambda_generator((problem_params.num_factors,))
+        return ttb.ktensor(factor_matrices, weights)
+    raise ValueError(f"Unsupported problem parameter type: {type(problem_params)=}")
+
+
+def generate_data(
+    solution: Union[ttb.ktensor, ttb.ttensor],
+    problem_params: BaseProblem,
+    data_params: DataParams,
+) -> ttb.tensor:
+    """Generate problem data."""
+    shape = solution.shape
+    # TODO handle the sparsity pattern
+    # TODO don't we already have a randn tensor method?
+    Rdm = ttb.tensor(randn(shape))
+    Z = solution.full()
+    if problem_params.symmetric is not None:
+        # TODO Note in MATLAB code to follow up
+        Rdm = Rdm.symmetrize(np.array(problem_params.symmetric))
+
+    D = Z + data_params.noise * Z.norm() * Rdm / Rdm.norm()
+    # Make sure the final result is definitely symmetric
+    if problem_params.symmetric is not None:
+        D = D.symmetrize(np.array(problem_params.symmetric))
+    return D
diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
new file mode 100644
index 00000000..3a7aac18
--- /dev/null
+++ b/tests/test_create_problem.py
@@ -0,0 +1,50 @@
+import pyttb as ttb
+from pyttb.create_problem import (
+    CPProblem,
+    DataParams,
+    TuckerProblem,
+    generate_data,
+    generate_solution,
+)
+
+
+def test_generate_solution_cp():
+    # Smoke test with defaults
+    shape = (2, 2, 2)
+    cp_params = CPProblem(shape)
+    model = generate_solution(cp_params)
+    assert isinstance(model, ttb.ktensor)
+    assert model.shape == shape
+
+    # TODO could test with different generators and enforce that they actually get used
+
+
+def test_generate_data_cp():
+    # Smoke test with defaults
+    shape = (2, 2, 2)
+    cp_params = CPProblem(shape)
+    model = generate_solution(cp_params)
+    data = generate_data(model, cp_params, data_params=DataParams())
+    assert isinstance(data, ttb.tensor)
+    assert data.shape == model.shape
+
+
+def test_generate_solution_tucker():
+    # Smoke test with defaults
+    shape = (2, 2, 2)
+    tucker_params = TuckerProblem(shape)
+    model = generate_solution(tucker_params)
+    assert isinstance(model, ttb.ttensor)
+    assert model.shape == shape
+
+    # TODO could test with different generators and enforce that they actually get used
+
+
+def test_generate_data_tucker():
+    # Smoke test with defaults
+    shape = (2, 2, 2)
+    tucker_params = TuckerProblem(shape)
+    model = generate_solution(tucker_params)
+    data = generate_data(model, tucker_params, data_params=DataParams())
+    assert isinstance(data, ttb.tensor)
+    assert data.shape == model.shape

From 476eab7f50d900fe45e36e0b91be4c2535a9f1d7 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Fri, 6 Jun 2025 08:00:26 -0400
Subject: [PATCH 07/20] Small cleanup and improv some testing

---
 pyttb/create_problem.py      |  4 +--
 tests/test_create_problem.py | 48 ++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
index 0eae152c..d6c846f3 100644
--- a/pyttb/create_problem.py
+++ b/pyttb/create_problem.py
@@ -38,7 +38,7 @@ class CPProblem(BaseProblem):
 
     num_factors: int = 2
     # TODO probably rename weight generator for consistency
-    lambda_generator: solution_generator = np.random.random
+    weight_generator: solution_generator = np.random.random
 
 
 @dataclass
@@ -180,7 +180,7 @@ def generate_solution(
         core = ttb.tensor(problem_params.core_generator(tuple(nfactors)))
         return ttb.ttensor(core, factor_matrices)
     elif isinstance(problem_params, CPProblem):
-        weights = problem_params.lambda_generator((problem_params.num_factors,))
+        weights = problem_params.weight_generator((problem_params.num_factors,))
         return ttb.ktensor(factor_matrices, weights)
     raise ValueError(f"Unsupported problem parameter type: {type(problem_params)=}")
 
diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
index 3a7aac18..843ebed8 100644
--- a/tests/test_create_problem.py
+++ b/tests/test_create_problem.py
@@ -1,13 +1,50 @@
+import pytest
+
 import pyttb as ttb
 from pyttb.create_problem import (
     CPProblem,
     DataParams,
+    MissingData,
     TuckerProblem,
+    create_problem,
     generate_data,
     generate_solution,
 )
 
 
+class TestDataclasses:
+    def test_dataparams(self):
+        with pytest.raises(ValueError):
+            number_larger_than_one = 2.0
+            DataParams(noise=number_larger_than_one)
+        with pytest.raises(ValueError):
+            number_less_than_zero = -2.0
+            DataParams(noise=number_less_than_zero)
+
+    def test_missingdata(self):
+        with pytest.raises(ValueError):
+            number_larger_than_one = 2.0
+            MissingData(missing_ratio=number_larger_than_one)
+        with pytest.raises(ValueError):
+            number_less_than_zero = -2.0
+            MissingData(missing_ratio=number_less_than_zero)
+        with pytest.raises(ValueError):
+            non_zero = 0.5
+            MissingData(missing_ratio=non_zero, sparse_model=True)
+
+        missing_params = MissingData(missing_ratio=0.1)
+        assert missing_params.has_missing()
+        with pytest.raises(ValueError):
+            missing_params.raise_symmetric()
+        missing_params = MissingData(sparse_model=True)
+        assert missing_params.has_missing()
+        with pytest.raises(ValueError):
+            missing_params.raise_symmetric()
+        missing_params = MissingData()
+        assert not missing_params.has_missing()
+        missing_params.raise_symmetric()
+
+
 def test_generate_solution_cp():
     # Smoke test with defaults
     shape = (2, 2, 2)
@@ -48,3 +85,14 @@ def test_generate_data_tucker():
     data = generate_data(model, tucker_params, data_params=DataParams())
     assert isinstance(data, ttb.tensor)
     assert data.shape == model.shape
+
+
+def test_create_problem_smoke():
+    shape = (2, 2, 2)
+    cp_params = CPProblem(shape)
+    data_params = DataParams()
+    missing_params = MissingData()
+    soln, data = create_problem(cp_params, missing_params, data_params)
+    assert soln.full().shape == data.shape
+
+    # TODO hit edge cases and symmetric

From ff2311be52b3d388548a50afe577b1b9bc3d4924 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Fri, 6 Jun 2025 14:16:03 -0400
Subject: [PATCH 08/20] Add basic support for sparse_generation

---
 pyttb/create_problem.py      | 89 ++++++++++++++++++++++++++++++++++--
 tests/test_create_problem.py | 14 ++++++
 2 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
index d6c846f3..af146a9c 100644
--- a/pyttb/create_problem.py
+++ b/pyttb/create_problem.py
@@ -1,9 +1,12 @@
 """Create test problems for  tensor factorizations."""
 
+import logging
+import math
 from dataclasses import dataclass
 from typing import Callable, Optional, Tuple, Union, cast, overload
 
 import numpy as np
+from numpy_groupies import aggregate as accumarray
 
 import pyttb as ttb
 from pyttb.pyttb_utils import Shape
@@ -39,6 +42,7 @@ class CPProblem(BaseProblem):
     num_factors: int = 2
     # TODO probably rename weight generator for consistency
     weight_generator: solution_generator = np.random.random
+    sparse_generation: Optional[float] = None
 
 
 @dataclass
@@ -100,7 +104,9 @@ def raise_symmetric(self):
 @overload
 def create_problem(
     problem_params: CPProblem, missing_params: MissingData, data_params: DataParams
-) -> Tuple[ttb.ktensor, ttb.tensor]: ...  # pragma: no cover see coveragepy/issues/970
+) -> Tuple[
+    ttb.ktensor, Union[ttb.tensor, ttb.sptensor]
+]: ...  # pragma: no cover see coveragepy/issues/970
 
 
 @overload
@@ -113,7 +119,7 @@ def create_problem(
     problem_params: Union[CPProblem, TuckerProblem],
     missing_params: MissingData,
     data_params: DataParams,
-) -> Tuple[Union[ttb.ktensor, ttb.ttensor], ttb.tensor]:
+) -> Tuple[Union[ttb.ktensor, ttb.ttensor], Union[ttb.tensor, ttb.sptensor]]:
     """Generate a problem and solution."""
     if problem_params.symmetric is not None:
         missing_params.raise_symmetric()
@@ -123,7 +129,15 @@ def create_problem(
     if missing_params.sparse_model:
         raise NotImplementedError("Sparse generation not yet supported")
 
-    data = generate_data(solution, problem_params, data_params)
+    data: Union[ttb.tensor, ttb.sptensor]
+    if (
+        isinstance(problem_params, CPProblem)
+        and problem_params.sparse_generation is not None
+    ):
+        solution = cast(ttb.ktensor, solution)
+        solution, data = generate_data_sparse(solution, problem_params, data_params)
+    else:
+        data = generate_data(solution, problem_params, data_params)
     return solution, data
 
 
@@ -205,3 +219,72 @@ def generate_data(
     if problem_params.symmetric is not None:
         D = D.symmetrize(np.array(problem_params.symmetric))
     return D
+
+
+def prosample(nsamples: int, prob: np.ndarray) -> np.ndarray:
+    """Proportional Sampling."""
+    bins = np.minimum(np.cumsum(np.array([0, *prob])), 1)
+    bins[-1] = 1
+    indices = np.digitize(np.random.random(nsamples), bins=bins)
+    return indices - 1
+
+
+def generate_data_sparse(
+    solution: ttb.ktensor, problem_params: CPProblem, data_params: DataParams
+) -> Tuple[ttb.ktensor, ttb.sptensor]:
+    """Generate sparse CP data from a given solution."""
+    # Error check on solution
+    if np.any(solution.weights < 0):
+        raise ValueError("All weights must be nonnegative.")
+    if any(np.any(factor < 0) for factor in solution.factor_matrices):
+        raise ValueError("All factor matrices must be nonnegative.")
+    if problem_params.symmetric is not None:
+        logging.warning("Summetric constraints have been ignored.")
+    if problem_params.sparse_generation is None:
+        raise ValueError("Cannot generate sparse data without sparse_generation set.")
+
+    # Convert solution to probability tensor
+    P = solution.normalize(mode=0)
+    eta = np.sum(P.weights)
+    P.weights /= eta
+
+    # Determine how many samples per component
+    nedges = problem_params.sparse_generation
+    if nedges < 1:
+        nedges = np.round(nedges * math.prod(P.shape)).astype(int)
+    nedges = int(nedges)
+    nd = P.ndims
+    nc = P.ncomponents
+    csample = prosample(nedges, P.weights)
+    # TODO check this
+    csums = accumarray(csample, 1, size=nc)
+
+    # Determine the subscripts for each randomly sampled entry
+    shape = solution.shape
+    subs: list[np.ndarray] = []
+    for c in range(nc):
+        nsample = csums[c]
+        if nsample == 0:
+            continue
+        subs.append(np.zeros((nsample, nd), dtype=int))
+        for d in range(nd):
+            subs[-1][:, d] = prosample(nsample, P.factor_matrices[d][:, c])
+    # TODO could sum csums and allocate in place with slicing
+    allsubs = np.vstack(subs)
+    # Assemble final tensor. Note that duplicates are summed.
+    # TODO should we have sptenones for purposes like this?
+    Z = ttb.sptensor(
+        allsubs,
+        np.ones(
+            len(allsubs),
+        ),
+        shape=shape,
+    )
+
+    # Rescale S so that it is proportional to the number of edges inserted
+    solution = P
+    solution.weights *= nedges
+
+    # TODO no noise introduced in this special case in MATLAB
+
+    return solution, Z
diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
index 843ebed8..32ff943e 100644
--- a/tests/test_create_problem.py
+++ b/tests/test_create_problem.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pytest
 
 import pyttb as ttb
@@ -96,3 +97,16 @@ def test_create_problem_smoke():
     assert soln.full().shape == data.shape
 
     # TODO hit edge cases and symmetric
+
+
+def test_create_problem_smoke_sparse():
+    shape = (2, 2, 2)
+    cp_params = CPProblem(
+        shape, sparse_generation=0.99, factor_generator=np.random.random
+    )
+    data_params = DataParams()
+    missing_params = MissingData()
+    soln, data = create_problem(cp_params, missing_params, data_params)
+    assert soln.full().shape == data.shape
+
+    # TODO hit edge cases and symmetric

From 12cf0679fcec8c183467c3feba7376dd00726409 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Mon, 9 Jun 2025 07:32:26 -0400
Subject: [PATCH 09/20] Fix a few comments

---
 pyttb/create_problem.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
index af146a9c..312217dc 100644
--- a/pyttb/create_problem.py
+++ b/pyttb/create_problem.py
@@ -40,8 +40,8 @@ class CPProblem(BaseProblem):
     """Parameters specifying CP Solutions."""
 
     num_factors: int = 2
-    # TODO probably rename weight generator for consistency
     weight_generator: solution_generator = np.random.random
+    # TODO: This is in DataParams in MATLAB, but only works for CP problems
     sparse_generation: Optional[float] = None
 
 
@@ -63,7 +63,6 @@ class DataParams:
     """Parameters to control data quality."""
 
     noise: float = 0.10
-    # TODO handle weird sparse_generation option
 
     def __post_init__(
         self,
@@ -164,7 +163,6 @@ def generate_solution_factors(base_params: BaseProblem) -> list[np.ndarray]:
 
     if base_params.symmetric is not None:
         for grp in base_params.symmetric:
-            # TODO see if this can be a single indexed op
             for j in range(1, len(grp)):
                 factor_matrices[grp[j]] = factor_matrices[grp[0]]
 

From 8cd301ae22bbe4e9f28f31d3d1620acb364dea63 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Mon, 16 Jun 2025 08:39:41 -0400
Subject: [PATCH 10/20] Minor improvement on collapse typing

---
 pyttb/sptensor.py | 16 ++++++++++++++++
 pyttb/tensor.py   | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py
index 942a212e..2eddee5b 100644
--- a/pyttb/sptensor.py
+++ b/pyttb/sptensor.py
@@ -438,6 +438,20 @@ def allsubs(self) -> np.ndarray:
 
         return s.astype(int)
 
+    @overload
+    def collapse(
+        self,
+        dims: None,
+        function_handle: Callable[[np.ndarray], Union[float, np.ndarray]],
+    ) -> float: ...  # pragma: no cover see coveragepy/issues/970
+
+    @overload
+    def collapse(
+        self,
+        dims: OneDArray,
+        function_handle: Callable[[np.ndarray], Union[float, np.ndarray]] = sum,
+    ) -> Union[np.ndarray, sptensor]: ...  # pragma: no cover see coveragepy/issues/970
+
     def collapse(
         self,
         dims: Optional[OneDArray] = None,
@@ -503,6 +517,8 @@ def collapse(
                     size=newsize[0],
                     func=function_handle,
                 )
+            # TODO think about if this makes sense
+            # complicates return typing
             return np.zeros((newsize[0],))
 
         # Create Result
diff --git a/pyttb/tensor.py b/pyttb/tensor.py
index 1fe81dd3..1c2e8aa8 100644
--- a/pyttb/tensor.py
+++ b/pyttb/tensor.py
@@ -311,6 +311,20 @@ def __deepcopy__(self, memo):
         """Return deep copy of this tensor."""
         return self.copy()
 
+    @overload
+    def collapse(
+        self,
+        dims: None,
+        fun: Callable[[np.ndarray], Union[float, np.ndarray]],
+    ) -> float: ...
+
+    @overload
+    def collapse(
+        self,
+        dims: OneDArray,
+        fun: Callable[[np.ndarray], Union[float, np.ndarray]] = np.sum,
+    ) -> Union[np.ndarray, tensor]: ...
+
     def collapse(
         self,
         dims: Optional[OneDArray] = None,
@@ -382,6 +396,8 @@ def collapse(
             Min value: -0.977277879876411
         """
         if self.data.size == 0:
+            # TODO verify this is the only thing that returns np array
+            # and remove
             return np.array([], order=self.order)
 
         if dims is None:

From 60339247a905b55cd6dfbb9949494479974ac2ca Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Mon, 16 Jun 2025 10:00:22 -0400
Subject: [PATCH 11/20] Preliminary implementation for missing data generation,
 minimal testing

---
 pyttb/create_problem.py      | 179 +++++++++++++++++++++++++++++++----
 tests/test_create_problem.py |  18 +++-
 2 files changed, 174 insertions(+), 23 deletions(-)

diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
index 312217dc..0cd19f65 100644
--- a/pyttb/create_problem.py
+++ b/pyttb/create_problem.py
@@ -9,7 +9,7 @@
 from numpy_groupies import aggregate as accumarray
 
 import pyttb as ttb
-from pyttb.pyttb_utils import Shape
+from pyttb.pyttb_utils import Shape, parse_shape
 
 solution_generator = Callable[[Tuple[int, ...]], np.ndarray]
 
@@ -77,20 +77,22 @@ class MissingData:
 
     missing_ratio: float = 0.0
     sparse_model: bool = False
-    # TODO add spare pattern tensor
+    missing_pattern: Optional[Union[ttb.sptensor, ttb.tensor]] = None
 
     def __post_init__(self):
         if not 0.0 <= self.missing_ratio <= 1.0:
             raise ValueError(
                 f"Missing ratio must be in [0,1] but got {self.missing_ratio}"
             )
-
-        if self.sparse_model and self.missing_ratio > 0.0:
-            raise ValueError("Can't combine missing data and sparse generation.")
+        if self.missing_ratio > 0.0 and self.missing_pattern is not None:
+            raise ValueError(
+                "Can't set ratio and explicit pattern to specify missing data. "
+                "Select one or the other."
+            )
 
     def has_missing(self) -> bool:
         """Check if any form of missing data is requested."""
-        return self.sparse_model or self.missing_ratio > 0.0
+        return self.missing_ratio > 0.0 or self.missing_pattern is not None
 
     def raise_symmetric(self):
         """Raise for unsupported symmetry request."""
@@ -99,6 +101,107 @@ def raise_symmetric(self):
         if self.sparse_model:
             raise ValueError("Can't generate sparse symmetric problem.")
 
+    def get_pattern(self, shape: Shape) -> Union[None, ttb.tensor, ttb.sptensor]:
+        """Generate a tensor pattern of missing data."""
+        if self.missing_pattern is not None:
+            if self.missing_pattern.shape != shape:
+                raise ValueError(
+                    "Missing pattern and problem shapes are not compatible."
+                )
+            return self.missing_pattern
+
+        if self.missing_ratio == 0.0:
+            # All usages of this are internal, should we just rule out this situation?
+            return None
+        if self.missing_ratio < 0.8 and self.sparse_model:
+            logging.warning(
+                "Setting sparse to false because there are"
+                " fewer than 80% missing elements."
+            )
+        return _create_missing_data_pattern(
+            shape, self.missing_ratio, self.sparse_model
+        )
+
+
+def _create_missing_data_pattern(
+    shape: Shape, missing_ratio: float, sparse_model: bool = False
+) -> Union[ttb.tensor, ttb.sptensor]:
+    """Create a randomly missing element indicator tensor.
+
+    Creates a binary tensor of specified size with 0's indication missing data
+    and 1's indicating valid data. Will only return a tensor that has at least
+    one entry per N-1 dimensional slice.
+    """
+    shape = parse_shape(shape)
+    ndim = len(shape)
+    P = math.prod(shape)
+    Q = math.ceil((1 - missing_ratio) * P)
+    W: Union[ttb.tensor, ttb.sptensor]
+
+    # Create tensor
+    ## Keep iterating until tensor is created or we give up.
+    # TODO: make range configurable?
+    for _ in range(20):
+        if sparse_model:
+            # Start with 50% more than Q random subs
+            # Note in original matlab to work out expected value of a*Q to guarantee
+            # Q unique entries
+            subs = np.unique(
+                np.floor(
+                    np.random.random((int(np.ceil(1.5 * Q)), len(shape))).dot(
+                        np.diag(shape)
+                    )
+                ),
+                axis=0,
+            ).astype(int)
+            # Check if there are too many unique subs
+            if len(subs) > Q:
+                # TODO: check if note from matlab still relevant
+                # Note in original matlab: unique orders the subs and would bias toward
+                # first subs with lower values, so we sample to cut back
+                idx = np.random.permutation(subs.shape[0])
+                subs = subs[idx[:Q]]
+            elif subs.shape[0] < Q:
+                logging.warning(
+                    f"Only generated {subs.shape[0]} of " f"{Q} desired subscripts"
+                )
+            W = ttb.sptensor(
+                subs,
+                np.ones(
+                    (len(subs), 1),
+                ),
+                shape=shape,
+            )
+        else:
+            # Compute the linear indices of the missing entries.
+            idx = np.random.permutation(P)
+            idx = idx[:Q]
+            W = ttb.tenzeros(shape)
+            W[idx] = 1
+        # return W
+
+        # Check if W has any empty slices
+        isokay = True
+        for n in range(ndim):
+            all_but_n = np.arange(W.ndims)
+            all_but_n = np.delete(all_but_n, n)
+            collapse_W = W.collapse(all_but_n)
+            if isinstance(collapse_W, np.ndarray):
+                isokay &= bool(np.all(collapse_W))
+            else:
+                isokay &= bool(np.all(collapse_W.double()))
+
+        # Quit if okay
+        if isokay:
+            break
+
+    if not isokay:
+        raise ValueError(
+            f"After {iter} iterations, cannot produce a tensor with"
+            f"{missing_ratio*100} missing data without an empty slice."
+        )
+    return W
+
 
 @overload
 def create_problem(
@@ -125,16 +228,21 @@ def create_problem(
 
     solution = generate_solution(problem_params)
 
-    if missing_params.sparse_model:
-        raise NotImplementedError("Sparse generation not yet supported")
-
     data: Union[ttb.tensor, ttb.sptensor]
     if (
         isinstance(problem_params, CPProblem)
         and problem_params.sparse_generation is not None
     ):
+        if missing_params.has_missing():
+            raise ValueError(
+                f"Can't combine missing data {MissingData.__name__} and "
+                f" sparse generation {CPProblem.__name__}."
+            )
         solution = cast(ttb.ktensor, solution)
         solution, data = generate_data_sparse(solution, problem_params, data_params)
+    elif missing_params.has_missing():
+        pattern = missing_params.get_pattern(solution.shape)
+        data = generate_data(solution, problem_params, data_params, pattern)
     else:
         data = generate_data(solution, problem_params, data_params)
     return solution, data
@@ -197,20 +305,55 @@ def generate_solution(
     raise ValueError(f"Unsupported problem parameter type: {type(problem_params)=}")
 
 
+@overload
+def generate_data(
+    solution: Union[ttb.ktensor, ttb.ttensor],
+    problem_params: BaseProblem,
+    data_params: DataParams,
+    pattern: Optional[ttb.tensor] = None,
+) -> ttb.tensor: ...  # pragma: no cover see coveragepy/issues/970
+
+
+@overload
 def generate_data(
     solution: Union[ttb.ktensor, ttb.ttensor],
     problem_params: BaseProblem,
     data_params: DataParams,
-) -> ttb.tensor:
+    pattern: ttb.sptensor,
+) -> ttb.sptensor: ...  # pragma: no cover see coveragepy/issues/970
+
+
+def generate_data(
+    solution: Union[ttb.ktensor, ttb.ttensor],
+    problem_params: BaseProblem,
+    data_params: DataParams,
+    pattern: Optional[Union[ttb.tensor, ttb.sptensor]] = None,
+) -> Union[ttb.tensor, ttb.sptensor]:
     """Generate problem data."""
     shape = solution.shape
-    # TODO handle the sparsity pattern
-    # TODO don't we already have a randn tensor method?
-    Rdm = ttb.tensor(randn(shape))
-    Z = solution.full()
-    if problem_params.symmetric is not None:
-        # TODO Note in MATLAB code to follow up
-        Rdm = Rdm.symmetrize(np.array(problem_params.symmetric))
+    Rdm: Union[ttb.tensor, ttb.sptensor]
+    if pattern is not None:
+        if isinstance(pattern, ttb.sptensor):
+            Rdm = ttb.sptensor(pattern.subs, randn((pattern.nnz, 1)), pattern.shape)
+            try:
+                Z = pattern * solution
+            except Exception as E:
+                raise ValueError(
+                    f"{pattern.shape=}, {pattern.subs.shape}, {pattern.vals.shape}"
+                ) from E
+
+        elif isinstance(pattern, ttb.tensor):
+            Rdm = pattern * ttb.tensor(randn(shape))
+            Z = pattern * solution.full()
+        else:
+            raise ValueError(f"Unsupported sparsity pattern of type {type(pattern)}")
+    else:
+        # TODO don't we already have a randn tensor method?
+        Rdm = ttb.tensor(randn(shape))
+        Z = solution.full()
+        if problem_params.symmetric is not None:
+            # TODO Note in MATLAB code to follow up
+            Rdm = Rdm.symmetrize(np.array(problem_params.symmetric))
 
     D = Z + data_params.noise * Z.norm() * Rdm / Rdm.norm()
     # Make sure the final result is definitely symmetric
@@ -274,7 +417,7 @@ def generate_data_sparse(
     Z = ttb.sptensor(
         allsubs,
         np.ones(
-            len(allsubs),
+            (len(allsubs), 1),
         ),
         shape=shape,
     )
diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
index 32ff943e..996c785e 100644
--- a/tests/test_create_problem.py
+++ b/tests/test_create_problem.py
@@ -29,16 +29,11 @@ def test_missingdata(self):
         with pytest.raises(ValueError):
             number_less_than_zero = -2.0
             MissingData(missing_ratio=number_less_than_zero)
-        with pytest.raises(ValueError):
-            non_zero = 0.5
-            MissingData(missing_ratio=non_zero, sparse_model=True)
 
         missing_params = MissingData(missing_ratio=0.1)
         assert missing_params.has_missing()
         with pytest.raises(ValueError):
             missing_params.raise_symmetric()
-        missing_params = MissingData(sparse_model=True)
-        assert missing_params.has_missing()
         with pytest.raises(ValueError):
             missing_params.raise_symmetric()
         missing_params = MissingData()
@@ -110,3 +105,16 @@ def test_create_problem_smoke_sparse():
     assert soln.full().shape == data.shape
 
     # TODO hit edge cases and symmetric
+
+
+def test_create_problem_smoke_missing():
+    shape = (4, 5, 6)
+    cp_params = CPProblem(shape, factor_generator=np.random.random)
+    data_params = DataParams()
+    missing_params = MissingData(missing_ratio=0.8)
+    soln, data = create_problem(cp_params, missing_params, data_params)
+    assert soln.full().shape == data.shape
+
+    missing_params = MissingData(missing_ratio=0.8, sparse_model=True)
+    soln, data = create_problem(cp_params, missing_params, data_params)
+    assert soln.full().shape == data.shape

From b014292763d75ed6178c42eb70c4f666024fd5da Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Tue, 17 Jun 2025 12:59:49 -0400
Subject: [PATCH 12/20] Add further smoke tests and tutorial notebook

---
 .../tutorial/utility_test_problem.ipynb       | 674 ++++++++++++++++++
 pyttb/create_problem.py                       |  19 +-
 tests/test_create_problem.py                  |  28 +
 3 files changed, 712 insertions(+), 9 deletions(-)
 create mode 100644 docs/source/tutorial/utility_test_problem.ipynb

diff --git a/docs/source/tutorial/utility_test_problem.ipynb b/docs/source/tutorial/utility_test_problem.ipynb
new file mode 100644
index 00000000..08e51035
--- /dev/null
+++ b/docs/source/tutorial/utility_test_problem.ipynb
@@ -0,0 +1,674 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f1c6d8db",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "source": [
+    "# Creating Test Problems\n",
+    "```\n",
+    "Copyright 2025 National Technology & Engineering Solutions of Sandia,\n",
+    "LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the\n",
+    "U.S. Government retains certain rights in this software.\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31511b37",
+   "metadata": {},
+   "source": [
+    "We demonstrate how to use the `create_problem` function to create test problems for decomposition algorithms. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "afb832c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyttb as ttb\n",
+    "from pyttb.create_problem import (\n",
+    "    CPProblem,\n",
+    "    TuckerProblem,\n",
+    "    MissingData,\n",
+    "    DataParams,\n",
+    "    create_problem,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "9451a579",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set global random seed for reproducibility of this notebook\n",
+    "import numpy as np\n",
+    "\n",
+    "np.random.seed(123)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7771e8fe",
+   "metadata": {},
+   "source": [
+    "## Create a CP test problem\n",
+    "The `create_problem` function generates both the solution (as a `ktensor` for CP) and the test data (as a dense `tensor`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e6191ae4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a problem\n",
+    "cp_specific_params = CPProblem(shape=(5, 4, 3), num_factors=3)\n",
+    "data_params = DataParams(noise=0.1)\n",
+    "no_missing_data = MissingData()\n",
+    "solution, data = create_problem(cp_specific_params, no_missing_data, data_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8745779d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ktensor of shape (5, 4, 3) with order F\n",
+      "weights=[0.94416002 0.50183668 0.62395295]\n",
+      "factor_matrices[0] =\n",
+      "[[-1.0856306   0.99734545  0.2829785 ]\n",
+      " [-1.50629471 -0.57860025  1.65143654]\n",
+      " [-2.42667924 -0.42891263  1.26593626]\n",
+      " [-0.8667404  -0.67888615 -0.09470897]\n",
+      " [ 1.49138963 -0.638902   -0.44398196]]\n",
+      "factor_matrices[1] =\n",
+      "[[-0.43435128  2.20593008  2.18678609]\n",
+      " [ 1.0040539   0.3861864   0.73736858]\n",
+      " [ 1.49073203 -0.93583387  1.17582904]\n",
+      " [-1.25388067 -0.6377515   0.9071052 ]]\n",
+      "factor_matrices[2] =\n",
+      "[[-1.4286807  -0.14006872 -0.8617549 ]\n",
+      " [-0.25561937 -2.79858911 -1.7715331 ]\n",
+      " [-0.69987723  0.92746243 -0.17363568]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Display the solution\n",
+    "print(solution)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b0bc3232",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor of shape (5, 4, 3) with order F\n",
+      "data[:, :, 0] =\n",
+      "[[-1.18990893  1.28446351  2.07235179 -1.87633271]\n",
+      " [-3.12652349  1.07273265  2.34701048 -3.14030325]\n",
+      " [-2.81968366  2.67865791  4.10636867 -4.33460199]\n",
+      " [-0.49910248  1.58553609  1.67667918 -1.4803083 ]\n",
+      " [ 1.5935628  -1.73784063 -2.7256112   2.76967403]]\n",
+      "data[:, :, 1] =\n",
+      "[[-4.02748914 -0.53027464  1.39868896  0.35255157]\n",
+      " [-2.24482406 -0.51914665 -2.34027329 -2.45371282]\n",
+      " [-2.02367801 -0.3794908  -1.16866717 -2.43337295]\n",
+      " [ 2.46562453  0.78956773 -0.26223999 -0.47003828]\n",
+      " [ 3.48686179  0.07186695 -1.21278825  0.24950518]]\n",
+      "data[:, :, 2] =\n",
+      "[[ 0.84583153  0.55670008  0.42026956 -0.99690908]\n",
+      " [-1.5567177   0.8349424   1.8725418  -1.14868937]\n",
+      " [-1.57718852  1.46198797  2.6604315  -2.05249945]\n",
+      " [-0.82259772  0.42556336  1.14869343 -0.65901074]\n",
+      " [-0.28411876 -1.17623054 -1.27449033  1.31403245]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Display the data\n",
+    "print(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "14a85431",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "# The difference between the true solution and measured data\n",
+    "# should match the specified noise setting\n",
+    "diff = (solution.full() - data).norm() / solution.full().norm()\n",
+    "print(diff)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b7abeb5",
+   "metadata": {},
+   "source": [
+    "## Creating a Tucker test problem\n",
+    "The `create_problem` function can also create Tucker problems by providing a `TuckerParams` data class as the first argument to `create_problem` instead. In this case, the function generates the solution as a `ttensor`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "f7af9632",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tucker_specific_params = TuckerProblem(shape=(5, 4, 3), num_factors=[3, 3, 2])\n",
+    "data_params = DataParams(noise=0.1)\n",
+    "no_missing_data = MissingData()\n",
+    "solution, data = create_problem(tucker_specific_params, no_missing_data, data_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "699c9ecc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TTensor of shape: (5, 4, 3)\n",
+      "\tCore is a\n",
+      "\t\ttensor of shape (3, 3, 2) with order F\n",
+      "\t\tdata[:, :, 0] =\n",
+      "\t\t[[ 2.29546945  0.8628987  -0.13287838]\n",
+      "\t\t [ 0.31529775  0.94012555 -1.24988658]\n",
+      "\t\t [-0.75751615  0.66752096 -1.84400643]]\n",
+      "\t\tdata[:, :, 1] =\n",
+      "\t\t[[ 0.82319976  0.06143129 -0.31048223]\n",
+      "\t\t [-0.71417742  1.06731682  0.3213871 ]\n",
+      "\t\t [ 0.33786152 -1.90931822  0.37383405]]\n",
+      "\tU[0] = \n",
+      "\t\t[[ 0.93898923  0.43781947  1.14109158]\n",
+      "\t\t [ 0.17145177 -1.54957884 -0.97402348]\n",
+      "\t\t [-1.0491106  -0.46483438 -0.49055989]\n",
+      "\t\t [ 1.0007457   2.14851419  1.43240926]\n",
+      "\t\t [-0.13335333  0.00577405 -0.66762081]]\n",
+      "\tU[1] = \n",
+      "\t\t[[-0.94061891  0.93080981  0.04634267]\n",
+      "\t\t [ 1.33673724  0.28026028  1.49663046]\n",
+      "\t\t [-0.68415163  0.335301   -1.12855526]\n",
+      "\t\t [-0.13372712 -0.78503925 -0.23590284]]\n",
+      "\tU[2] = \n",
+      "\t\t[[-1.41195749 -0.88776123]\n",
+      "\t\t [ 0.10426711  0.42249603]\n",
+      "\t\t [-0.20072189 -1.41672713]]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Display the solution\n",
+    "print(solution)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "48285087",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor of shape (5, 4, 3) with order F\n",
+      "data[:, :, 0] =\n",
+      "[[ 1.90571751  1.29306932 -2.66047991  0.4787608 ]\n",
+      " [ 3.32632534 -8.24046905  7.05868556 -0.94570443]\n",
+      " [-1.70172708  2.04521885  0.47297378 -1.76717467]\n",
+      " [-1.77933637  5.49652024 -7.81954496  2.61105222]\n",
+      " [-0.62849444 -2.47539421  1.61469082  0.71437041]]\n",
+      "data[:, :, 1] =\n",
+      "[[-0.90290826  0.53648692  0.06304186  0.10529605]\n",
+      " [-0.59241983  0.91173894 -0.68241772  0.38676663]\n",
+      " [ 0.40853234 -0.04163589  0.21205378  0.08396353]\n",
+      " [-0.53454083  0.26397327  0.43616478 -0.47223017]\n",
+      " [ 0.07478656 -0.04549533  0.20458064 -0.37257969]]\n",
+      "data[:, :, 2] =\n",
+      "[[ 3.01781992 -1.167676    1.59175537 -0.96841114]\n",
+      " [ 1.37702074 -0.87936349  0.47784026 -0.01377307]\n",
+      " [-1.51797541  1.40668289 -0.8199048   0.2912658 ]\n",
+      " [-0.00535056 -0.77270545  0.0753881   0.21781704]\n",
+      " [-1.98105208  0.16641742 -0.82378859  1.06506215]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Display the data\n",
+    "print(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "9305a0be",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "# The difference between the true solution and measured data\n",
+    "# should match the specified noise setting\n",
+    "diff = (solution.full() - data).norm() / solution.full().norm()\n",
+    "print(diff)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a3cdffab",
+   "metadata": {},
+   "source": [
+    "## Recreating the same test problem\n",
+    "We are still relying on numpy's deprecated global random state. See [#441](https://github.com/sandialabs/pyttb/issues/441)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "4d836930",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Problem details\n",
+    "shape = [5, 4, 3]\n",
+    "num_factors = 3\n",
+    "seed = 123\n",
+    "missing_params = MissingData()\n",
+    "data_params = DataParams()\n",
+    "cp_specific_params = CPProblem(shape, num_factors=num_factors)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "21c10394",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate the first test problem\n",
+    "np.random.seed(seed)\n",
+    "solution_1, data_1 = create_problem(cp_specific_params, missing_params, data_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "749f8aae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate the second test problem\n",
+    "np.random.seed(seed)\n",
+    "solution_2, data_2 = create_problem(cp_specific_params, missing_params, data_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "6c6dd4a6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "solution_1.isequal(solution_2)=True\n",
+      "(data_1-data_2).norm()=0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check that the solutions are identical\n",
+    "print(f\"{solution_1.isequal(solution_2)=}\")\n",
+    "\n",
+    "# Check that the data are identical\n",
+    "print(f\"{(data_1-data_2).norm()=}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "90a399d2",
+   "metadata": {},
+   "source": [
+    "## Options for creating factor matrices, core tensors, and weights\n",
+    "\n",
+    "User specified functions may be provided to generate the relevant components of `ktensors` or `ttensors`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "7e20d77a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1. 1.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example custom weight generator for CP Problems\n",
+    "cp_specific_params = CPProblem(shape=[5, 4, 3], num_factors=2, weight_generator=np.ones)\n",
+    "solution, _ = create_problem(cp_specific_params, missing_params, data_params)\n",
+    "print(f\"{solution.weights}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "4f18ec86",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor of shape (2, 2, 2) with order F\n",
+      "data[:, :, 0] =\n",
+      "[[1. 1.]\n",
+      " [1. 1.]]\n",
+      "data[:, :, 1] =\n",
+      "[[1. 1.]\n",
+      " [1. 1.]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example custom core generator for Tucker\n",
+    "tucker_specific_params = TuckerProblem(\n",
+    "    shape=[5, 4, 3], num_factors=[2, 2, 2], core_generator=ttb.tenones\n",
+    ")\n",
+    "solution, _ = create_problem(tucker_specific_params, missing_params, data_params)\n",
+    "print(f\"{solution.core}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40db96b5",
+   "metadata": {},
+   "source": [
+    "## Create dense missing data problems\n",
+    "It's possible to create problems that have a percentage of missing data. The problem generator randomly creates the pattern of missing data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "e6ceafb2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor of shape (5, 4, 3) with order F\n",
+      "data[:, :, 0] =\n",
+      "[[1. 1. 1. 1.]\n",
+      " [1. 1. 1. 1.]\n",
+      " [1. 0. 0. 1.]\n",
+      " [1. 0. 1. 1.]\n",
+      " [0. 0. 1. 1.]]\n",
+      "data[:, :, 1] =\n",
+      "[[1. 0. 1. 1.]\n",
+      " [0. 1. 1. 1.]\n",
+      " [0. 0. 1. 0.]\n",
+      " [0. 1. 0. 1.]\n",
+      " [0. 1. 1. 1.]]\n",
+      "data[:, :, 2] =\n",
+      "[[1. 1. 1. 1.]\n",
+      " [1. 0. 1. 1.]\n",
+      " [1. 1. 1. 0.]\n",
+      " [1. 1. 1. 1.]\n",
+      " [1. 1. 1. 1.]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Specify 25% missing data\n",
+    "missing_data_params = MissingData(missing_ratio=0.25)\n",
+    "\n",
+    "# Show an example of randomly generated pattern\n",
+    "# 1 is known 0 is unknown\n",
+    "print(missing_data_params.get_pattern(shape=[5, 4, 3]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "de646ec4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate problem using a newly sampled pattern\n",
+    "solution, data = create_problem(cp_specific_params, missing_data_params, data_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "a51a3e70",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor of shape (5, 4, 3) with order F\n",
+      "data[:, :, 0] =\n",
+      "[[ 0.26328253 -0.10368023  2.55048192 -3.57426141]\n",
+      " [ 0.94610094  0.         -0.33422528  0.        ]\n",
+      " [-0.92754391  0.06078374 -0.58964057  1.05604786]\n",
+      " [ 0.09245559  0.09024844 -0.30026929  1.37588424]\n",
+      " [ 0.          0.28395231  1.72801315 -0.92447749]]\n",
+      "data[:, :, 1] =\n",
+      "[[ 9.52217582e+00 -0.00000000e+00  0.00000000e+00 -6.69297443e+00]\n",
+      " [ 1.15649571e+00  0.00000000e+00  5.55042375e-01 -1.65046604e+00]\n",
+      " [-4.51899793e+00  0.00000000e+00  5.78509093e-01  0.00000000e+00]\n",
+      " [-2.79055031e+00  0.00000000e+00  4.46173850e-01  2.02037594e+00]\n",
+      " [ 0.00000000e+00 -4.02815924e-01 -7.73108195e-01  8.60303664e-03]]\n",
+      "data[:, :, 2] =\n",
+      "[[ 3.79691232 -0.06051519  0.65215482 -0.        ]\n",
+      " [ 0.88487369 -0.32951914 -0.         -0.4502584 ]\n",
+      " [-2.0738586  -0.1541553  -0.01849825  0.        ]\n",
+      " [-0.88031719  0.          0.          1.15149304]\n",
+      " [-0.26446742 -0.16180758  0.39415731 -0.15164033]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Show data (including noise) with missing entries zeroed out\n",
+    "print(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b318629f",
+   "metadata": {},
+   "source": [
+    "## Creating sparse missing data problems\n",
+    "If `sparse_models` is set to true then the returned data is sparse. This should only be used with `missing_ratio` >= 0.8."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "475f352b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F\n",
+      "[2, 0, 0] = 1.0\n",
+      "[4, 1, 2] = 1.0\n",
+      "[0, 2, 1] = 1.0\n",
+      "[3, 1, 0] = 1.0\n",
+      "[0, 3, 2] = 1.0\n",
+      "[4, 1, 0] = 1.0\n",
+      "[2, 0, 2] = 1.0\n",
+      "[1, 0, 2] = 1.0\n",
+      "[0, 1, 2] = 1.0\n",
+      "[4, 2, 0] = 1.0\n",
+      "[4, 3, 0] = 1.0\n",
+      "[4, 1, 1] = 1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "missing_data_params = MissingData(missing_ratio=0.8, sparse_model=True)\n",
+    "\n",
+    "# Here is a candidate pattern of known data\n",
+    "print(missing_data_params.get_pattern([5, 4, 3]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "927d028b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F\n",
+      "[0, 0, 2] = -5.383104265170353\n",
+      "[1, 0, 1] = 1.3205409642301527\n",
+      "[1, 3, 1] = 0.37245008604597707\n",
+      "[2, 1, 0] = 3.4968221275551286\n",
+      "[2, 3, 0] = -0.60505637068868\n",
+      "[3, 1, 0] = 1.2090679007381293\n",
+      "[3, 3, 0] = 0.465905565990883\n",
+      "[3, 3, 1] = -0.4776597676392981\n",
+      "[4, 2, 0] = 1.322753952503849\n",
+      "[4, 2, 2] = 4.164836676033628\n",
+      "[4, 3, 1] = 0.04320152879052623\n",
+      "[4, 3, 2] = 0.5475986467539911\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Here is the data (including noise) with zeros not explicitly represented.\n",
+    "solution, data = create_problem(cp_specific_params, missing_data_params, data_params)\n",
+    "print(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b0a4db7a",
+   "metadata": {},
+   "source": [
+    "## Create missing data problems with pre-specified pattern\n",
+    "A specific pattern (dense or sparse) can be use to represent missing data. This is also currently the recommended approach for reproducibility."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "499efc37",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor of shape (5, 4, 3) with order F\n",
+      "data[:, :, 0] =\n",
+      "[[ 1.12259246 -0.62712395  0.37444797  0.14341225]\n",
+      " [ 0.         -0.23923868 -0.28106573 -0.        ]\n",
+      " [-2.19406735 -0.         -1.26176736 -0.96253911]\n",
+      " [ 1.19096803  0.73586963  0.82194128  0.71532815]\n",
+      " [-0.06070134  0.18508213  0.05135651 -0.09115959]]\n",
+      "data[:, :, 1] =\n",
+      "[[ 0.         -2.17818307  0.00366178  0.        ]\n",
+      " [-0.51123889  0.          0.         -0.30924106]\n",
+      " [-2.75480765 -0.36658613 -1.36684341 -1.02292674]\n",
+      " [ 0.9916353   0.          0.72938433  0.66456863]\n",
+      " [-0.40295989  0.38817973 -0.07536029 -0.03630603]]\n",
+      "data[:, :, 2] =\n",
+      "[[-1.17821661  1.27948531  0.16695706 -0.        ]\n",
+      " [-0.         -0.15915173 -0.17588344  0.02034108]\n",
+      " [-0.          0.          0.         -0.33177688]\n",
+      " [ 0.61206739 -0.17658631  0.1972258   0.        ]\n",
+      " [ 0.         -0.21265941 -0.00546545  0.07131428]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Grab a pattern from before\n",
+    "pattern = MissingData(missing_ratio=0.25).get_pattern([5, 4, 3])\n",
+    "missing_data_params = MissingData(missing_pattern=pattern)\n",
+    "solution, data = create_problem(cp_specific_params, missing_data_params, data_params)\n",
+    "print(data)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
index 0cd19f65..fedaf5d5 100644
--- a/pyttb/create_problem.py
+++ b/pyttb/create_problem.py
@@ -12,6 +12,9 @@
 from pyttb.pyttb_utils import Shape, parse_shape
 
 solution_generator = Callable[[Tuple[int, ...]], np.ndarray]
+core_generator_t = Callable[
+    [Tuple[int, ...]], Union[ttb.tensor, ttb.sptensor, np.ndarray]
+]
 
 
 def randn(shape: Tuple[int, ...]) -> np.ndarray:
@@ -51,7 +54,7 @@ class TuckerProblem(BaseProblem):
 
     # TODO post_init set to [2, 2, 2]
     num_factors: Optional[list[int]] = None
-    core_generator: solution_generator = randn
+    core_generator: core_generator_t = randn
 
     def __post_init__(self):
         super().__post_init__()
@@ -297,7 +300,11 @@ def generate_solution(
     # Create final model
     if isinstance(problem_params, TuckerProblem):
         nfactors = cast(list[int], problem_params.num_factors)
-        core = ttb.tensor(problem_params.core_generator(tuple(nfactors)))
+        generated_core = problem_params.core_generator(tuple(nfactors))
+        if isinstance(generated_core, (ttb.tensor, ttb.sptensor)):
+            core = generated_core
+        else:
+            core = ttb.tensor(generated_core)
         return ttb.ttensor(core, factor_matrices)
     elif isinstance(problem_params, CPProblem):
         weights = problem_params.weight_generator((problem_params.num_factors,))
@@ -335,13 +342,7 @@ def generate_data(
     if pattern is not None:
         if isinstance(pattern, ttb.sptensor):
             Rdm = ttb.sptensor(pattern.subs, randn((pattern.nnz, 1)), pattern.shape)
-            try:
-                Z = pattern * solution
-            except Exception as E:
-                raise ValueError(
-                    f"{pattern.shape=}, {pattern.subs.shape}, {pattern.vals.shape}"
-                ) from E
-
+            Z = pattern * solution
         elif isinstance(pattern, ttb.tensor):
             Rdm = pattern * ttb.tensor(randn(shape))
             Z = pattern * solution.full()
diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
index 996c785e..d4c202da 100644
--- a/tests/test_create_problem.py
+++ b/tests/test_create_problem.py
@@ -3,6 +3,7 @@
 
 import pyttb as ttb
 from pyttb.create_problem import (
+    BaseProblem,
     CPProblem,
     DataParams,
     MissingData,
@@ -91,6 +92,20 @@ def test_create_problem_smoke():
     soln, data = create_problem(cp_params, missing_params, data_params)
     assert soln.full().shape == data.shape
 
+    cp_params.symmetric = [(0, 1)]
+    soln, data = create_problem(cp_params, missing_params, data_params)
+    assert soln.full().shape == data.shape
+
+    with pytest.raises(ValueError):
+        empty_num_factors = BaseProblem(shape)
+        create_problem(empty_num_factors, missing_params, data_params)
+    with pytest.raises(ValueError):
+        inconsistent_num_factors = BaseProblem(shape, num_factors=[2, 2])
+        create_problem(inconsistent_num_factors, missing_params, data_params)
+    with pytest.raises(ValueError):
+        bad_problem_type = BaseProblem(shape, num_factors=3)
+        create_problem(bad_problem_type, missing_params, data_params)
+
     # TODO hit edge cases and symmetric
 
 
@@ -104,6 +119,9 @@ def test_create_problem_smoke_sparse():
     soln, data = create_problem(cp_params, missing_params, data_params)
     assert soln.full().shape == data.shape
 
+    with pytest.raises(ValueError):
+        missing_AND_sparse_generation = MissingData(missing_ratio=0.1)
+        create_problem(cp_params, missing_AND_sparse_generation, data_params)
     # TODO hit edge cases and symmetric
 
 
@@ -118,3 +136,13 @@ def test_create_problem_smoke_missing():
     missing_params = MissingData(missing_ratio=0.8, sparse_model=True)
     soln, data = create_problem(cp_params, missing_params, data_params)
     assert soln.full().shape == data.shape
+
+    with pytest.raises(ValueError):
+        bad_pattern_shape = np.ones([dim + 1 for dim in soln.shape])
+        missing_params = MissingData(missing_pattern=bad_pattern_shape)
+        create_problem(cp_params, missing_params, data_params)
+
+    with pytest.raises(ValueError):
+        bad_pattern_type = np.ones(soln.shape)
+        missing_params = MissingData(missing_pattern=bad_pattern_type)
+        create_problem(cp_params, missing_params, data_params)

From d2725a369e30164485734a792952bcdc91e7cd5f Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Tue, 17 Jun 2025 13:00:12 -0400
Subject: [PATCH 13/20] Fix ttensor doc string

---
 pyttb/ttensor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyttb/ttensor.py b/pyttb/ttensor.py
index eb58d4f4..ba22ebec 100644
--- a/pyttb/ttensor.py
+++ b/pyttb/ttensor.py
@@ -200,7 +200,7 @@ def __repr__(self):  # pragma: no cover
         str
             Contains the core, and factor matrices as strings on different lines.
         """
-        display_string = f"Tensor of shape: {self.shape}\n" f"\tCore is a\n"
+        display_string = f"TTensor of shape: {self.shape}\n" f"\tCore is a\n"
         display_string += textwrap.indent(str(self.core), "\t\t")
         display_string += "\n"
 

From aeb52fc3c8460694a2daaf3e9dbcec60b26e4c7f Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Tue, 17 Jun 2025 13:00:34 -0400
Subject: [PATCH 14/20] Fix coverage in overload

---
 pyttb/tensor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyttb/tensor.py b/pyttb/tensor.py
index 1c2e8aa8..4aaf9ca0 100644
--- a/pyttb/tensor.py
+++ b/pyttb/tensor.py
@@ -316,14 +316,14 @@ def collapse(
         self,
         dims: None,
         fun: Callable[[np.ndarray], Union[float, np.ndarray]],
-    ) -> float: ...
+    ) -> float: ...  # pragma: no cover see coveragepy/issues/970
 
     @overload
     def collapse(
         self,
         dims: OneDArray,
         fun: Callable[[np.ndarray], Union[float, np.ndarray]] = np.sum,
-    ) -> Union[np.ndarray, tensor]: ...
+    ) -> Union[np.ndarray, tensor]: ...  # pragma: no cover see coveragepy/issues/970
 
     def collapse(
         self,

From a0d85124766ae59eb4c4d40c58dc2b2febcc1298 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Jun 2025 08:48:01 -0400
Subject: [PATCH 15/20] Add minimal validation to sptensor to avoid footgun

---
 pyttb/sptensor.py      | 5 +++++
 tests/test_sptensor.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py
index 2eddee5b..cca500ca 100644
--- a/pyttb/sptensor.py
+++ b/pyttb/sptensor.py
@@ -163,6 +163,11 @@ def __init__(
         if vals.size == 0:
             # In case user provides an empty array in weird format
             vals = np.array([], dtype=vals.dtype, ndmin=2)
+        elif len(vals.shape) == 1:
+            # Enforce column array
+            vals = vals.reshape((vals.shape[0], 1))
+        elif len(vals.shape) > 2:
+            raise ValueError("Values should be a column vector")
 
         if copy:
             self.subs = subs.copy()
diff --git a/tests/test_sptensor.py b/tests/test_sptensor.py
index 0e41e23a..b7d32756 100644
--- a/tests/test_sptensor.py
+++ b/tests/test_sptensor.py
@@ -1357,7 +1357,7 @@ def test_sptensor_squeeze(sample_sptensor):
     )
     assert np.array_equal(
         ttb.sptensor(np.array([[0, 0, 0]]), np.array([4]), (2, 2, 1)).squeeze().vals,
-        np.array([4]),
+        np.array([[4]]),
     )
 
     # Singleton dimension with empty sptensor

From 9a0aef73d52c9734c8ceb509bbdc841cc55a07b9 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Wed, 18 Jun 2025 11:01:31 -0400
Subject: [PATCH 16/20] Clean up create problem documentation

---
 docs/source/create_problem.rst                |  22 +
 docs/source/tensor_classes.rst                |   1 +
 .../tutorial/utility_test_problem.ipynb       | 461 +++---------------
 docs/source/tutorials.rst                     |   5 +
 pyttb/create_problem.py                       | 163 +++++--
 tests/test_create_problem.py                  |  37 +-
 6 files changed, 256 insertions(+), 433 deletions(-)
 create mode 100644 docs/source/create_problem.rst

diff --git a/docs/source/create_problem.rst b/docs/source/create_problem.rst
new file mode 100644
index 00000000..9b6a99dc
--- /dev/null
+++ b/docs/source/create_problem.rst
@@ -0,0 +1,22 @@
+Create Test Problems (:obj:`pyttb.create_problem`)
+---------------------------------------------------
+
+.. autoclass:: pyttb.create_problem.BaseProblem
+    :exclude-members: __dict__, __weakref__, __slots__
+    :show-inheritance:
+
+.. autoclass:: pyttb.create_problem.CPProblem
+    :exclude-members: __dict__, __weakref__, __slots__
+    :show-inheritance:
+
+.. autoclass:: pyttb.create_problem.TuckerProblem
+    :exclude-members: __dict__, __weakref__, __slots__
+    :show-inheritance:
+
+.. autoclass:: pyttb.create_problem.MissingData
+    :members:
+    :special-members:
+    :exclude-members: __dict__, __weakref__, __slots__
+    :show-inheritance:
+
+.. autofunction:: pyttb.create_problem.create_problem
diff --git a/docs/source/tensor_classes.rst b/docs/source/tensor_classes.rst
index 56fb9f21..4a220ed3 100644
--- a/docs/source/tensor_classes.rst
+++ b/docs/source/tensor_classes.rst
@@ -12,4 +12,5 @@ Tensor Classes
     tenmat.rst
     sptenmat.rst
     pyttb_utils.rst
+    create_problem.rst
 
diff --git a/docs/source/tutorial/utility_test_problem.ipynb b/docs/source/tutorial/utility_test_problem.ipynb
index 08e51035..d4fabe8f 100644
--- a/docs/source/tutorial/utility_test_problem.ipynb
+++ b/docs/source/tutorial/utility_test_problem.ipynb
@@ -2,12 +2,8 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "f1c6d8db",
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
+   "id": "0",
+   "metadata": {},
    "source": [
     "# Creating Test Problems\n",
     "```\n",
@@ -19,7 +15,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "31511b37",
+   "id": "1",
    "metadata": {},
    "source": [
     "We demonstrate how to use the `create_problem` function to create test problems for decomposition algorithms. "
@@ -27,8 +23,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "afb832c2",
+   "execution_count": null,
+   "id": "2",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,15 +33,14 @@
     "    CPProblem,\n",
     "    TuckerProblem,\n",
     "    MissingData,\n",
-    "    DataParams,\n",
     "    create_problem,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "9451a579",
+   "execution_count": null,
+   "id": "3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -57,7 +52,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7771e8fe",
+   "id": "4",
    "metadata": {},
    "source": [
     "## Create a CP test problem\n",
@@ -66,48 +61,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "e6191ae4",
+   "execution_count": null,
+   "id": "5",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Create a problem\n",
-    "cp_specific_params = CPProblem(shape=(5, 4, 3), num_factors=3)\n",
-    "data_params = DataParams(noise=0.1)\n",
+    "cp_specific_params = CPProblem(shape=(5, 4, 3), num_factors=3, noise=0.1)\n",
     "no_missing_data = MissingData()\n",
-    "solution, data = create_problem(cp_specific_params, no_missing_data, data_params)"
+    "solution, data = create_problem(cp_specific_params, no_missing_data)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "8745779d",
+   "execution_count": null,
+   "id": "6",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ktensor of shape (5, 4, 3) with order F\n",
-      "weights=[0.94416002 0.50183668 0.62395295]\n",
-      "factor_matrices[0] =\n",
-      "[[-1.0856306   0.99734545  0.2829785 ]\n",
-      " [-1.50629471 -0.57860025  1.65143654]\n",
-      " [-2.42667924 -0.42891263  1.26593626]\n",
-      " [-0.8667404  -0.67888615 -0.09470897]\n",
-      " [ 1.49138963 -0.638902   -0.44398196]]\n",
-      "factor_matrices[1] =\n",
-      "[[-0.43435128  2.20593008  2.18678609]\n",
-      " [ 1.0040539   0.3861864   0.73736858]\n",
-      " [ 1.49073203 -0.93583387  1.17582904]\n",
-      " [-1.25388067 -0.6377515   0.9071052 ]]\n",
-      "factor_matrices[2] =\n",
-      "[[-1.4286807  -0.14006872 -0.8617549 ]\n",
-      " [-0.25561937 -2.79858911 -1.7715331 ]\n",
-      " [-0.69987723  0.92746243 -0.17363568]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Display the solution\n",
     "print(solution)"
@@ -115,36 +85,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "b0bc3232",
+   "execution_count": null,
+   "id": "7",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor of shape (5, 4, 3) with order F\n",
-      "data[:, :, 0] =\n",
-      "[[-1.18990893  1.28446351  2.07235179 -1.87633271]\n",
-      " [-3.12652349  1.07273265  2.34701048 -3.14030325]\n",
-      " [-2.81968366  2.67865791  4.10636867 -4.33460199]\n",
-      " [-0.49910248  1.58553609  1.67667918 -1.4803083 ]\n",
-      " [ 1.5935628  -1.73784063 -2.7256112   2.76967403]]\n",
-      "data[:, :, 1] =\n",
-      "[[-4.02748914 -0.53027464  1.39868896  0.35255157]\n",
-      " [-2.24482406 -0.51914665 -2.34027329 -2.45371282]\n",
-      " [-2.02367801 -0.3794908  -1.16866717 -2.43337295]\n",
-      " [ 2.46562453  0.78956773 -0.26223999 -0.47003828]\n",
-      " [ 3.48686179  0.07186695 -1.21278825  0.24950518]]\n",
-      "data[:, :, 2] =\n",
-      "[[ 0.84583153  0.55670008  0.42026956 -0.99690908]\n",
-      " [-1.5567177   0.8349424   1.8725418  -1.14868937]\n",
-      " [-1.57718852  1.46198797  2.6604315  -2.05249945]\n",
-      " [-0.82259772  0.42556336  1.14869343 -0.65901074]\n",
-      " [-0.28411876 -1.17623054 -1.27449033  1.31403245]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Display the data\n",
     "print(data)"
@@ -152,18 +96,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "14a85431",
+   "execution_count": null,
+   "id": "8",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.1\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# The difference between the true solution and measured data\n",
     "# should match the specified noise setting\n",
@@ -173,7 +109,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "1b7abeb5",
+   "id": "9",
    "metadata": {},
    "source": [
     "## Creating a Tucker test problem\n",
@@ -182,57 +118,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "f7af9632",
+   "execution_count": null,
+   "id": "10",
    "metadata": {},
    "outputs": [],
    "source": [
-    "tucker_specific_params = TuckerProblem(shape=(5, 4, 3), num_factors=[3, 3, 2])\n",
-    "data_params = DataParams(noise=0.1)\n",
+    "tucker_specific_params = TuckerProblem(\n",
+    "    shape=(5, 4, 3), num_factors=[3, 3, 2], noise=0.1\n",
+    ")\n",
     "no_missing_data = MissingData()\n",
-    "solution, data = create_problem(tucker_specific_params, no_missing_data, data_params)"
+    "solution, data = create_problem(tucker_specific_params, no_missing_data)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "699c9ecc",
+   "execution_count": null,
+   "id": "11",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TTensor of shape: (5, 4, 3)\n",
-      "\tCore is a\n",
-      "\t\ttensor of shape (3, 3, 2) with order F\n",
-      "\t\tdata[:, :, 0] =\n",
-      "\t\t[[ 2.29546945  0.8628987  -0.13287838]\n",
-      "\t\t [ 0.31529775  0.94012555 -1.24988658]\n",
-      "\t\t [-0.75751615  0.66752096 -1.84400643]]\n",
-      "\t\tdata[:, :, 1] =\n",
-      "\t\t[[ 0.82319976  0.06143129 -0.31048223]\n",
-      "\t\t [-0.71417742  1.06731682  0.3213871 ]\n",
-      "\t\t [ 0.33786152 -1.90931822  0.37383405]]\n",
-      "\tU[0] = \n",
-      "\t\t[[ 0.93898923  0.43781947  1.14109158]\n",
-      "\t\t [ 0.17145177 -1.54957884 -0.97402348]\n",
-      "\t\t [-1.0491106  -0.46483438 -0.49055989]\n",
-      "\t\t [ 1.0007457   2.14851419  1.43240926]\n",
-      "\t\t [-0.13335333  0.00577405 -0.66762081]]\n",
-      "\tU[1] = \n",
-      "\t\t[[-0.94061891  0.93080981  0.04634267]\n",
-      "\t\t [ 1.33673724  0.28026028  1.49663046]\n",
-      "\t\t [-0.68415163  0.335301   -1.12855526]\n",
-      "\t\t [-0.13372712 -0.78503925 -0.23590284]]\n",
-      "\tU[2] = \n",
-      "\t\t[[-1.41195749 -0.88776123]\n",
-      "\t\t [ 0.10426711  0.42249603]\n",
-      "\t\t [-0.20072189 -1.41672713]]\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Display the solution\n",
     "print(solution)"
@@ -240,36 +143,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "48285087",
+   "execution_count": null,
+   "id": "12",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor of shape (5, 4, 3) with order F\n",
-      "data[:, :, 0] =\n",
-      "[[ 1.90571751  1.29306932 -2.66047991  0.4787608 ]\n",
-      " [ 3.32632534 -8.24046905  7.05868556 -0.94570443]\n",
-      " [-1.70172708  2.04521885  0.47297378 -1.76717467]\n",
-      " [-1.77933637  5.49652024 -7.81954496  2.61105222]\n",
-      " [-0.62849444 -2.47539421  1.61469082  0.71437041]]\n",
-      "data[:, :, 1] =\n",
-      "[[-0.90290826  0.53648692  0.06304186  0.10529605]\n",
-      " [-0.59241983  0.91173894 -0.68241772  0.38676663]\n",
-      " [ 0.40853234 -0.04163589  0.21205378  0.08396353]\n",
-      " [-0.53454083  0.26397327  0.43616478 -0.47223017]\n",
-      " [ 0.07478656 -0.04549533  0.20458064 -0.37257969]]\n",
-      "data[:, :, 2] =\n",
-      "[[ 3.01781992 -1.167676    1.59175537 -0.96841114]\n",
-      " [ 1.37702074 -0.87936349  0.47784026 -0.01377307]\n",
-      " [-1.51797541  1.40668289 -0.8199048   0.2912658 ]\n",
-      " [-0.00535056 -0.77270545  0.0753881   0.21781704]\n",
-      " [-1.98105208  0.16641742 -0.82378859  1.06506215]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Display the data\n",
     "print(data)"
@@ -277,18 +154,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "9305a0be",
+   "execution_count": null,
+   "id": "13",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.1\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# The difference between the true solution and measured data\n",
     "# should match the specified noise setting\n",
@@ -298,7 +167,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "a3cdffab",
+   "id": "14",
    "metadata": {},
    "source": [
     "## Recreating the same test problem\n",
@@ -307,8 +176,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "4d836930",
+   "execution_count": null,
+   "id": "15",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -317,49 +186,39 @@
     "num_factors = 3\n",
     "seed = 123\n",
     "missing_params = MissingData()\n",
-    "data_params = DataParams()\n",
     "cp_specific_params = CPProblem(shape, num_factors=num_factors)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "21c10394",
+   "execution_count": null,
+   "id": "16",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Generate the first test problem\n",
     "np.random.seed(seed)\n",
-    "solution_1, data_1 = create_problem(cp_specific_params, missing_params, data_params)"
+    "solution_1, data_1 = create_problem(cp_specific_params, missing_params)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "749f8aae",
+   "execution_count": null,
+   "id": "17",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Generate the second test problem\n",
     "np.random.seed(seed)\n",
-    "solution_2, data_2 = create_problem(cp_specific_params, missing_params, data_params)"
+    "solution_2, data_2 = create_problem(cp_specific_params, missing_params)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "6c6dd4a6",
+   "execution_count": null,
+   "id": "18",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "solution_1.isequal(solution_2)=True\n",
-      "(data_1-data_2).norm()=0.0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Check that the solutions are identical\n",
     "print(f\"{solution_1.isequal(solution_2)=}\")\n",
@@ -370,7 +229,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "90a399d2",
+   "id": "19",
    "metadata": {},
    "source": [
     "## Options for creating factor matrices, core tensors, and weights\n",
@@ -380,57 +239,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "7e20d77a",
+   "execution_count": null,
+   "id": "20",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[1. 1.]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Example custom weight generator for CP Problems\n",
     "cp_specific_params = CPProblem(shape=[5, 4, 3], num_factors=2, weight_generator=np.ones)\n",
-    "solution, _ = create_problem(cp_specific_params, missing_params, data_params)\n",
+    "solution, _ = create_problem(cp_specific_params, missing_params)\n",
     "print(f\"{solution.weights}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "id": "4f18ec86",
+   "execution_count": null,
+   "id": "21",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor of shape (2, 2, 2) with order F\n",
-      "data[:, :, 0] =\n",
-      "[[1. 1.]\n",
-      " [1. 1.]]\n",
-      "data[:, :, 1] =\n",
-      "[[1. 1.]\n",
-      " [1. 1.]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Example custom core generator for Tucker\n",
     "tucker_specific_params = TuckerProblem(\n",
     "    shape=[5, 4, 3], num_factors=[2, 2, 2], core_generator=ttb.tenones\n",
     ")\n",
-    "solution, _ = create_problem(tucker_specific_params, missing_params, data_params)\n",
+    "solution, _ = create_problem(tucker_specific_params, missing_params)\n",
     "print(f\"{solution.core}\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "40db96b5",
+   "id": "22",
    "metadata": {},
    "source": [
     "## Create dense missing data problems\n",
@@ -439,36 +276,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "id": "e6ceafb2",
+   "execution_count": null,
+   "id": "23",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor of shape (5, 4, 3) with order F\n",
-      "data[:, :, 0] =\n",
-      "[[1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1.]\n",
-      " [1. 0. 0. 1.]\n",
-      " [1. 0. 1. 1.]\n",
-      " [0. 0. 1. 1.]]\n",
-      "data[:, :, 1] =\n",
-      "[[1. 0. 1. 1.]\n",
-      " [0. 1. 1. 1.]\n",
-      " [0. 0. 1. 0.]\n",
-      " [0. 1. 0. 1.]\n",
-      " [0. 1. 1. 1.]]\n",
-      "data[:, :, 2] =\n",
-      "[[1. 1. 1. 1.]\n",
-      " [1. 0. 1. 1.]\n",
-      " [1. 1. 1. 0.]\n",
-      " [1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1.]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Specify 25% missing data\n",
     "missing_data_params = MissingData(missing_ratio=0.25)\n",
@@ -480,47 +291,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "id": "de646ec4",
+   "execution_count": null,
+   "id": "24",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Generate problem using a newly sampled pattern\n",
-    "solution, data = create_problem(cp_specific_params, missing_data_params, data_params)"
+    "solution, data = create_problem(cp_specific_params, missing_data_params)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "id": "a51a3e70",
+   "execution_count": null,
+   "id": "25",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor of shape (5, 4, 3) with order F\n",
-      "data[:, :, 0] =\n",
-      "[[ 0.26328253 -0.10368023  2.55048192 -3.57426141]\n",
-      " [ 0.94610094  0.         -0.33422528  0.        ]\n",
-      " [-0.92754391  0.06078374 -0.58964057  1.05604786]\n",
-      " [ 0.09245559  0.09024844 -0.30026929  1.37588424]\n",
-      " [ 0.          0.28395231  1.72801315 -0.92447749]]\n",
-      "data[:, :, 1] =\n",
-      "[[ 9.52217582e+00 -0.00000000e+00  0.00000000e+00 -6.69297443e+00]\n",
-      " [ 1.15649571e+00  0.00000000e+00  5.55042375e-01 -1.65046604e+00]\n",
-      " [-4.51899793e+00  0.00000000e+00  5.78509093e-01  0.00000000e+00]\n",
-      " [-2.79055031e+00  0.00000000e+00  4.46173850e-01  2.02037594e+00]\n",
-      " [ 0.00000000e+00 -4.02815924e-01 -7.73108195e-01  8.60303664e-03]]\n",
-      "data[:, :, 2] =\n",
-      "[[ 3.79691232 -0.06051519  0.65215482 -0.        ]\n",
-      " [ 0.88487369 -0.32951914 -0.         -0.4502584 ]\n",
-      " [-2.0738586  -0.1541553  -0.01849825  0.        ]\n",
-      " [-0.88031719  0.          0.          1.15149304]\n",
-      " [-0.26446742 -0.16180758  0.39415731 -0.15164033]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Show data (including noise) with missing entries zeroed out\n",
     "print(data)"
@@ -528,7 +313,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b318629f",
+   "id": "26",
    "metadata": {},
    "source": [
     "## Creating sparse missing data problems\n",
@@ -538,29 +323,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "475f352b",
+   "id": "27",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F\n",
-      "[2, 0, 0] = 1.0\n",
-      "[4, 1, 2] = 1.0\n",
-      "[0, 2, 1] = 1.0\n",
-      "[3, 1, 0] = 1.0\n",
-      "[0, 3, 2] = 1.0\n",
-      "[4, 1, 0] = 1.0\n",
-      "[2, 0, 2] = 1.0\n",
-      "[1, 0, 2] = 1.0\n",
-      "[0, 1, 2] = 1.0\n",
-      "[4, 2, 0] = 1.0\n",
-      "[4, 3, 0] = 1.0\n",
-      "[4, 1, 1] = 1.0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "missing_data_params = MissingData(missing_ratio=0.8, sparse_model=True)\n",
     "\n",
@@ -570,39 +335,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "id": "927d028b",
+   "execution_count": null,
+   "id": "28",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sparse tensor of shape (5, 4, 3) with 12 nonzeros and order F\n",
-      "[0, 0, 2] = -5.383104265170353\n",
-      "[1, 0, 1] = 1.3205409642301527\n",
-      "[1, 3, 1] = 0.37245008604597707\n",
-      "[2, 1, 0] = 3.4968221275551286\n",
-      "[2, 3, 0] = -0.60505637068868\n",
-      "[3, 1, 0] = 1.2090679007381293\n",
-      "[3, 3, 0] = 0.465905565990883\n",
-      "[3, 3, 1] = -0.4776597676392981\n",
-      "[4, 2, 0] = 1.322753952503849\n",
-      "[4, 2, 2] = 4.164836676033628\n",
-      "[4, 3, 1] = 0.04320152879052623\n",
-      "[4, 3, 2] = 0.5475986467539911\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Here is the data (including noise) with zeros not explicitly represented.\n",
-    "solution, data = create_problem(cp_specific_params, missing_data_params, data_params)\n",
+    "solution, data = create_problem(cp_specific_params, missing_data_params)\n",
     "print(data)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "b0a4db7a",
+   "id": "29",
    "metadata": {},
    "source": [
     "## Create missing data problems with pre-specified pattern\n",
@@ -611,64 +356,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
-   "id": "499efc37",
+   "execution_count": null,
+   "id": "30",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor of shape (5, 4, 3) with order F\n",
-      "data[:, :, 0] =\n",
-      "[[ 1.12259246 -0.62712395  0.37444797  0.14341225]\n",
-      " [ 0.         -0.23923868 -0.28106573 -0.        ]\n",
-      " [-2.19406735 -0.         -1.26176736 -0.96253911]\n",
-      " [ 1.19096803  0.73586963  0.82194128  0.71532815]\n",
-      " [-0.06070134  0.18508213  0.05135651 -0.09115959]]\n",
-      "data[:, :, 1] =\n",
-      "[[ 0.         -2.17818307  0.00366178  0.        ]\n",
-      " [-0.51123889  0.          0.         -0.30924106]\n",
-      " [-2.75480765 -0.36658613 -1.36684341 -1.02292674]\n",
-      " [ 0.9916353   0.          0.72938433  0.66456863]\n",
-      " [-0.40295989  0.38817973 -0.07536029 -0.03630603]]\n",
-      "data[:, :, 2] =\n",
-      "[[-1.17821661  1.27948531  0.16695706 -0.        ]\n",
-      " [-0.         -0.15915173 -0.17588344  0.02034108]\n",
-      " [-0.          0.          0.         -0.33177688]\n",
-      " [ 0.61206739 -0.17658631  0.1972258   0.        ]\n",
-      " [ 0.         -0.21265941 -0.00546545  0.07131428]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Grab a pattern from before\n",
     "pattern = MissingData(missing_ratio=0.25).get_pattern([5, 4, 3])\n",
     "missing_data_params = MissingData(missing_pattern=pattern)\n",
-    "solution, data = create_problem(cp_specific_params, missing_data_params, data_params)\n",
+    "solution, data = create_problem(cp_specific_params, missing_data_params)\n",
     "print(data)"
    ]
   }
  ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.12"
-  }
- },
+ "metadata": {},
  "nbformat": 4,
  "nbformat_minor": 5
 }
diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst
index ed373b56..4b1ffbc8 100644
--- a/docs/source/tutorials.rst
+++ b/docs/source/tutorials.rst
@@ -32,6 +32,11 @@ Tucker Decompositions
 Working with Tensors
 ====================
 
+.. toctree::
+   :maxdepth: 1
+
+   Creating Test Problems<tutorial/utility_test_problem.ipynb>   
+
 Converting Between Tensors and Matrices
 ---------------------------------------
 
diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
index fedaf5d5..f89cde6d 100644
--- a/pyttb/create_problem.py
+++ b/pyttb/create_problem.py
@@ -2,7 +2,7 @@
 
 import logging
 import math
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Callable, Optional, Tuple, Union, cast, overload
 
 import numpy as np
@@ -27,30 +27,99 @@ def randn(shape: Tuple[int, ...]) -> np.ndarray:
 
 @dataclass
 class BaseProblem:
-    """Parameters general to all solutions."""
+    """Parameters general to all solutions.
+
+    Attributes
+    ----------
+    shape:
+        Tensor shape for generated problem.
+    factor_generator:
+        Method to generate factor matrices.
+    symmetric:
+        List of modes that should be symmetric.
+        For instance, `[(1,2), (3,4)]` specifies that
+        modes 1 and 2 have identical factor matrices, and modes 3 and 4
+        also have identical factor matrices.
+    num_factors:
+        Number of factors.
+    noise:
+        Amount of Gaussian noise to add to solution.
+        If data is sparse noise is only added to nonzero entries.
+    """
 
-    shape: Shape
+    shape: Shape = field(metadata={"doc": "A shape"})
     factor_generator: solution_generator = randn
     symmetric: Optional[list[Tuple[int, int]]] = None
     num_factors: Union[int, list[int], None] = None
+    noise: float = 0.10
 
     def __post_init__(self):
         self.shape = ttb.pyttb_utils.parse_shape(self.shape)
+        if not 0.0 <= self.noise <= 1.0:
+            raise ValueError(f"Noise must be in [0,1] but got {self.noise}")
 
 
 @dataclass
 class CPProblem(BaseProblem):
-    """Parameters specifying CP Solutions."""
+    """Parameters specifying CP Solutions.
+
+    Attributes
+    ----------
+    shape:
+        Tensor shape for generated problem.
+    factor_generator:
+        Method to generate factor matrices.
+    symmetric:
+        List of modes that should be symmetric.
+        For instance, `[(1,2), (3,4)]` specifies that
+        modes 1 and 2 have identical factor matrices, and modes 3 and 4
+        also have identical factor matrices.
+    num_factors:
+        Number of factors.
+    noise:
+        Amount of Gaussian noise to add to solution.
+        If data is sparse noise is only added to nonzero entries.
+    weight_generator:
+        Method to generate weights for ktensor solution.
+    sparse_generation:
+        Generate a sparse tensor that can be scaled so that the
+        column factors and weights are stochastic. Provide a number
+        of nonzeros to be inserted. A value in range [0,1) will be
+        interpreted as a ratio.
+    """
+
+    # NOTE inherited attributes are manually copy pasted, keep aligned between problems
 
     num_factors: int = 2
     weight_generator: solution_generator = np.random.random
-    # TODO: This is in DataParams in MATLAB, but only works for CP problems
+    # TODO: This is in DataParams in MATLAB, but only works for CP problems so
+    # feels more reasonable here
     sparse_generation: Optional[float] = None
 
 
 @dataclass
 class TuckerProblem(BaseProblem):
-    """Parameters specifying Tucker Solutions."""
+    """Parameters specifying Tucker Solutions.
+
+    Attributes
+    ----------
+    shape:
+        Tensor shape for generated problem.
+    factor_generator:
+        Method to generate factor matrices.
+    symmetric:
+        List of modes that should be symmetric.
+        For instance, `[(1,2), (3,4)]` specifies that
+        modes 1 and 2 have identical factor matrices, and modes 3 and 4
+        also have identical factor matrices.
+    num_factors:
+        Number of factors.
+    noise:
+        Amount of Gaussian noise to add to solution.
+        If data is sparse noise is only added to nonzero entries.
+    core_generator:
+        Method to generate weights for ttensor solution.
+    """
 
     # TODO post_init set to [2, 2, 2]
     num_factors: Optional[list[int]] = None
@@ -61,26 +130,25 @@ def __post_init__(self):
         self.num_factors = self.num_factors or [2, 2, 2]
 
 
-@dataclass
-class DataParams:
-    """Parameters to control data quality."""
-
-    noise: float = 0.10
-
-    def __post_init__(
-        self,
-    ):
-        if not 0.0 <= self.noise <= 1.0:
-            raise ValueError(f"Noise must be in [0,1] but got {self.noise}")
-
-
 @dataclass
 class MissingData:
-    """Parameters to control missing data."""
+    """Parameters to control missing data.
+
+    Attributes
+    ----------
+    missing_ratio:
+        Proportion of missing data.
+    missing_pattern:
+        An explicit tensor representing missing data locations.
+    sparse_model:
+        Whether to generate sparse rather than dense missing data pattern.
+        Only useful for large tensors that don't easily fit in memory and
+        when missing ratio > 0.8.
+    """
 
     missing_ratio: float = 0.0
-    sparse_model: bool = False
     missing_pattern: Optional[Union[ttb.sptensor, ttb.tensor]] = None
+    sparse_model: bool = False
 
     def __post_init__(self):
         if not 0.0 <= self.missing_ratio <= 1.0:
@@ -208,7 +276,7 @@ def _create_missing_data_pattern(
 
 @overload
 def create_problem(
-    problem_params: CPProblem, missing_params: MissingData, data_params: DataParams
+    problem_params: CPProblem, missing_params: MissingData
 ) -> Tuple[
     ttb.ktensor, Union[ttb.tensor, ttb.sptensor]
 ]: ...  # pragma: no cover see coveragepy/issues/970
@@ -216,16 +284,47 @@ def create_problem(
 
 @overload
 def create_problem(
-    problem_params: TuckerProblem, missing_params: MissingData, data_params: DataParams
+    problem_params: TuckerProblem,
+    missing_params: MissingData,
 ) -> Tuple[ttb.ttensor, ttb.tensor]: ...  # pragma: no cover see coveragepy/issues/970
 
 
 def create_problem(
     problem_params: Union[CPProblem, TuckerProblem],
     missing_params: MissingData,
-    data_params: DataParams,
 ) -> Tuple[Union[ttb.ktensor, ttb.ttensor], Union[ttb.tensor, ttb.sptensor]]:
-    """Generate a problem and solution."""
+    """Generate a problem and solution.
+
+    Arguments
+    ---------
+    problem_params:
+        Parameters related to the problem to generate.
+    missing_params:
+        Parameters to control missing data in the generated data/solution.
+
+    Examples
+    --------
+    Base example params
+
+    >>> shape = (5, 4, 3)
+
+    Generate a CP problem
+
+    >>> cp_specific_params = CPProblem(shape=shape, num_factors=3, noise=0.1)
+    >>> no_missing_data = MissingData()
+    >>> solution, data = create_problem(cp_specific_params, no_missing_data)
+    >>> diff = (solution.full() - data).norm() / solution.full().norm()
+    >>> bool(np.isclose(diff, 0.1))
+    True
+
+    Generate Tucker Problem
+
+    >>> tucker_specific_params = TuckerProblem(shape, num_factors=[3, 3, 2], noise=0.1)
+    >>> solution, data = create_problem(tucker_specific_params, no_missing_data)
+    >>> diff = (solution.full() - data).norm() / solution.full().norm()
+    >>> bool(np.isclose(diff, 0.1))
+    True
+    """
     if problem_params.symmetric is not None:
         missing_params.raise_symmetric()
 
@@ -242,12 +341,12 @@ def create_problem(
                 f" sparse generation {CPProblem.__name__}."
             )
         solution = cast(ttb.ktensor, solution)
-        solution, data = generate_data_sparse(solution, problem_params, data_params)
+        solution, data = generate_data_sparse(solution, problem_params)
     elif missing_params.has_missing():
         pattern = missing_params.get_pattern(solution.shape)
-        data = generate_data(solution, problem_params, data_params, pattern)
+        data = generate_data(solution, problem_params, pattern)
     else:
-        data = generate_data(solution, problem_params, data_params)
+        data = generate_data(solution, problem_params)
     return solution, data
 
 
@@ -316,7 +415,6 @@ def generate_solution(
 def generate_data(
     solution: Union[ttb.ktensor, ttb.ttensor],
     problem_params: BaseProblem,
-    data_params: DataParams,
     pattern: Optional[ttb.tensor] = None,
 ) -> ttb.tensor: ...  # pragma: no cover see coveragepy/issues/970
 
@@ -325,7 +423,6 @@ def generate_data(
 def generate_data(
     solution: Union[ttb.ktensor, ttb.ttensor],
     problem_params: BaseProblem,
-    data_params: DataParams,
     pattern: ttb.sptensor,
 ) -> ttb.sptensor: ...  # pragma: no cover see coveragepy/issues/970
 
@@ -333,7 +430,6 @@ def generate_data(
 def generate_data(
     solution: Union[ttb.ktensor, ttb.ttensor],
     problem_params: BaseProblem,
-    data_params: DataParams,
     pattern: Optional[Union[ttb.tensor, ttb.sptensor]] = None,
 ) -> Union[ttb.tensor, ttb.sptensor]:
     """Generate problem data."""
@@ -356,7 +452,7 @@ def generate_data(
             # TODO Note in MATLAB code to follow up
             Rdm = Rdm.symmetrize(np.array(problem_params.symmetric))
 
-    D = Z + data_params.noise * Z.norm() * Rdm / Rdm.norm()
+    D = Z + problem_params.noise * Z.norm() * Rdm / Rdm.norm()
     # Make sure the final result is definitely symmetric
     if problem_params.symmetric is not None:
         D = D.symmetrize(np.array(problem_params.symmetric))
@@ -372,7 +468,8 @@ def prosample(nsamples: int, prob: np.ndarray) -> np.ndarray:
 
 
 def generate_data_sparse(
-    solution: ttb.ktensor, problem_params: CPProblem, data_params: DataParams
+    solution: ttb.ktensor,
+    problem_params: CPProblem,
 ) -> Tuple[ttb.ktensor, ttb.sptensor]:
     """Generate sparse CP data from a given solution."""
     # Error check on solution
diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
index d4c202da..9b2637d0 100644
--- a/tests/test_create_problem.py
+++ b/tests/test_create_problem.py
@@ -5,7 +5,6 @@
 from pyttb.create_problem import (
     BaseProblem,
     CPProblem,
-    DataParams,
     MissingData,
     TuckerProblem,
     create_problem,
@@ -15,13 +14,14 @@
 
 
 class TestDataclasses:
-    def test_dataparams(self):
+    def test_problemparams(self):
+        arbitrary_shape = (2, 2, 2)
         with pytest.raises(ValueError):
             number_larger_than_one = 2.0
-            DataParams(noise=number_larger_than_one)
+            BaseProblem(arbitrary_shape, noise=number_larger_than_one)
         with pytest.raises(ValueError):
             number_less_than_zero = -2.0
-            DataParams(noise=number_less_than_zero)
+            BaseProblem(arbitrary_shape, noise=number_less_than_zero)
 
     def test_missingdata(self):
         with pytest.raises(ValueError):
@@ -58,7 +58,7 @@ def test_generate_data_cp():
     shape = (2, 2, 2)
     cp_params = CPProblem(shape)
     model = generate_solution(cp_params)
-    data = generate_data(model, cp_params, data_params=DataParams())
+    data = generate_data(model, cp_params)
     assert isinstance(data, ttb.tensor)
     assert data.shape == model.shape
 
@@ -79,7 +79,7 @@ def test_generate_data_tucker():
     shape = (2, 2, 2)
     tucker_params = TuckerProblem(shape)
     model = generate_solution(tucker_params)
-    data = generate_data(model, tucker_params, data_params=DataParams())
+    data = generate_data(model, tucker_params)
     assert isinstance(data, ttb.tensor)
     assert data.shape == model.shape
 
@@ -87,24 +87,23 @@ def test_generate_data_tucker():
 def test_create_problem_smoke():
     shape = (2, 2, 2)
     cp_params = CPProblem(shape)
-    data_params = DataParams()
     missing_params = MissingData()
-    soln, data = create_problem(cp_params, missing_params, data_params)
+    soln, data = create_problem(cp_params, missing_params)
     assert soln.full().shape == data.shape
 
     cp_params.symmetric = [(0, 1)]
-    soln, data = create_problem(cp_params, missing_params, data_params)
+    soln, data = create_problem(cp_params, missing_params)
     assert soln.full().shape == data.shape
 
     with pytest.raises(ValueError):
         empty_num_factors = BaseProblem(shape)
-        create_problem(empty_num_factors, missing_params, data_params)
+        create_problem(empty_num_factors, missing_params)
     with pytest.raises(ValueError):
         inconsistent_num_factors = BaseProblem(shape, num_factors=[2, 2])
-        create_problem(inconsistent_num_factors, missing_params, data_params)
+        create_problem(inconsistent_num_factors, missing_params)
     with pytest.raises(ValueError):
         bad_problem_type = BaseProblem(shape, num_factors=3)
-        create_problem(bad_problem_type, missing_params, data_params)
+        create_problem(bad_problem_type, missing_params)
 
     # TODO hit edge cases and symmetric
 
@@ -114,35 +113,33 @@ def test_create_problem_smoke_sparse():
     cp_params = CPProblem(
         shape, sparse_generation=0.99, factor_generator=np.random.random
     )
-    data_params = DataParams()
     missing_params = MissingData()
-    soln, data = create_problem(cp_params, missing_params, data_params)
+    soln, data = create_problem(cp_params, missing_params)
     assert soln.full().shape == data.shape
 
     with pytest.raises(ValueError):
         missing_AND_sparse_generation = MissingData(missing_ratio=0.1)
-        create_problem(cp_params, missing_AND_sparse_generation, data_params)
+        create_problem(cp_params, missing_AND_sparse_generation)
     # TODO hit edge cases and symmetric
 
 
 def test_create_problem_smoke_missing():
     shape = (4, 5, 6)
     cp_params = CPProblem(shape, factor_generator=np.random.random)
-    data_params = DataParams()
     missing_params = MissingData(missing_ratio=0.8)
-    soln, data = create_problem(cp_params, missing_params, data_params)
+    soln, data = create_problem(cp_params, missing_params)
     assert soln.full().shape == data.shape
 
     missing_params = MissingData(missing_ratio=0.8, sparse_model=True)
-    soln, data = create_problem(cp_params, missing_params, data_params)
+    soln, data = create_problem(cp_params, missing_params)
     assert soln.full().shape == data.shape
 
     with pytest.raises(ValueError):
         bad_pattern_shape = np.ones([dim + 1 for dim in soln.shape])
         missing_params = MissingData(missing_pattern=bad_pattern_shape)
-        create_problem(cp_params, missing_params, data_params)
+        create_problem(cp_params, missing_params)
 
     with pytest.raises(ValueError):
         bad_pattern_type = np.ones(soln.shape)
         missing_params = MissingData(missing_pattern=bad_pattern_type)
-        create_problem(cp_params, missing_params, data_params)
+        create_problem(cp_params, missing_params)

From 2444a1ad411017a47e3a30e083a9b99db0aef2df Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Sat, 28 Jun 2025 10:54:56 -0400
Subject: [PATCH 17/20] Extend some test converage

---
 tests/test_create_problem.py | 61 ++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
index 9b2637d0..ee5e97a5 100644
--- a/tests/test_create_problem.py
+++ b/tests/test_create_problem.py
@@ -9,6 +9,7 @@
     TuckerProblem,
     create_problem,
     generate_data,
+    generate_data_sparse,
     generate_solution,
 )
 
@@ -24,6 +25,7 @@ def test_problemparams(self):
             BaseProblem(arbitrary_shape, noise=number_less_than_zero)
 
     def test_missingdata(self):
+        arbitrary_shape = (2, 2, 2)
         with pytest.raises(ValueError):
             number_larger_than_one = 2.0
             MissingData(missing_ratio=number_larger_than_one)
@@ -31,16 +33,36 @@ def test_missingdata(self):
             number_less_than_zero = -2.0
             MissingData(missing_ratio=number_less_than_zero)
 
+        with pytest.raises(ValueError):
+            number_larger_than_zero = 1.0
+            arbitrary_missing_pattern = ttb.tenones(arbitrary_shape)
+            MissingData(
+                missing_ratio=number_larger_than_zero,
+                missing_pattern=arbitrary_missing_pattern,
+            )
+
         missing_params = MissingData(missing_ratio=0.1)
         assert missing_params.has_missing()
         with pytest.raises(ValueError):
             missing_params.raise_symmetric()
+
+        missing_params = MissingData(sparse_model=True)
         with pytest.raises(ValueError):
             missing_params.raise_symmetric()
+
         missing_params = MissingData()
         assert not missing_params.has_missing()
         missing_params.raise_symmetric()
 
+        missing_params = MissingData()
+        assert missing_params.get_pattern(arbitrary_shape) is None
+
+    def test_missingdata_logging(self, caplog):
+        arbitrary_shape = (2, 2, 2)
+        missing_params = MissingData(missing_ratio=0.1, sparse_model=True)
+        missing_params.get_pattern(arbitrary_shape)
+        assert "missing elements" in caplog.text
+
 
 def test_generate_solution_cp():
     # Smoke test with defaults
@@ -71,6 +93,12 @@ def test_generate_solution_tucker():
     assert isinstance(model, ttb.ttensor)
     assert model.shape == shape
 
+    # Smoke test with a tensor core generator
+    shape = (2, 2, 2)
+    tucker_params = TuckerProblem(shape, core_generator=ttb.tenrand)
+    model = generate_solution(tucker_params)
+    assert isinstance(model, ttb.ttensor)
+    assert model.shape == shape
     # TODO could test with different generators and enforce that they actually get used
 
 
@@ -143,3 +171,36 @@ def test_create_problem_smoke_missing():
         bad_pattern_type = np.ones(soln.shape)
         missing_params = MissingData(missing_pattern=bad_pattern_type)
         create_problem(cp_params, missing_params)
+
+
+def test_generate_data_sparse_value_errors():
+    """Test that generate_data_sparse raises expected ValueErrors."""
+    shape = (3, 3, 3)
+
+    # Test negative weights
+    factor_matrices = [np.random.random((3, 2)) for _ in range(3)]
+    negative_weights = np.array([-1.0, 1.0])  # One negative weight
+    solution = ttb.ktensor(factor_matrices, negative_weights)
+    problem_params = CPProblem(shape, sparse_generation=0.5)
+
+    with pytest.raises(ValueError):
+        generate_data_sparse(solution, problem_params)
+
+    # Test negative factor matrices
+    factor_matrices = [np.random.random((3, 2)) for _ in range(3)]
+    factor_matrices[0][0, 0] = -1.0  # Make one element negative
+    positive_weights = np.array([1.0, 1.0])
+    solution = ttb.ktensor(factor_matrices, positive_weights)
+    problem_params = CPProblem(shape, sparse_generation=0.5)
+
+    with pytest.raises(ValueError):
+        generate_data_sparse(solution, problem_params)
+
+    # Test missing sparse_generation
+    factor_matrices = [np.random.random((3, 2)) for _ in range(3)]
+    positive_weights = np.array([1.0, 1.0])
+    solution = ttb.ktensor(factor_matrices, positive_weights)
+    problem_params = CPProblem(shape, sparse_generation=None)
+
+    with pytest.raises(ValueError):
+        generate_data_sparse(solution, problem_params)

From 2f10f55963886a747dffaee31b49c5118cca326b Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Sat, 28 Jun 2025 12:03:10 -0400
Subject: [PATCH 18/20] Add existing solution support and updated docs

---
 docs/source/create_problem.rst                |  16 ++-
 .../tutorial/utility_test_problem.ipynb       |  68 ++++++++-
 pyttb/create_problem.py                       | 130 +++++++++++++++---
 tests/test_create_problem.py                  |  21 +++
 4 files changed, 214 insertions(+), 21 deletions(-)

diff --git a/docs/source/create_problem.rst b/docs/source/create_problem.rst
index 9b6a99dc..211bbb5d 100644
--- a/docs/source/create_problem.rst
+++ b/docs/source/create_problem.rst
@@ -1,6 +1,8 @@
 Create Test Problems (:obj:`pyttb.create_problem`)
 ---------------------------------------------------
 
+.. autofunction:: pyttb.create_problem.create_problem
+
 .. autoclass:: pyttb.create_problem.BaseProblem
     :exclude-members: __dict__, __weakref__, __slots__
     :show-inheritance:
@@ -13,10 +15,20 @@ Create Test Problems (:obj:`pyttb.create_problem`)
     :exclude-members: __dict__, __weakref__, __slots__
     :show-inheritance:
 
+.. autoclass:: pyttb.create_problem.ExistingSolution
+    :exclude-members: __dict__, __weakref__, __slots__
+    :show-inheritance:
+
+.. autoclass:: pyttb.create_problem.ExistingCPSolution
+    :exclude-members: __dict__, __weakref__, __slots__
+    :show-inheritance:
+
+.. autoclass:: pyttb.create_problem.ExistingTuckerSolution
+    :exclude-members: __dict__, __weakref__, __slots__
+    :show-inheritance:
+
 .. autoclass:: pyttb.create_problem.MissingData
     :members:
     :special-members:
     :exclude-members: __dict__, __weakref__, __slots__
     :show-inheritance:
-
-.. autofunction:: pyttb.create_problem.create_problem
diff --git a/docs/source/tutorial/utility_test_problem.ipynb b/docs/source/tutorial/utility_test_problem.ipynb
index d4fabe8f..92feb77a 100644
--- a/docs/source/tutorial/utility_test_problem.ipynb
+++ b/docs/source/tutorial/utility_test_problem.ipynb
@@ -31,6 +31,7 @@
     "import pyttb as ttb\n",
     "from pyttb.create_problem import (\n",
     "    CPProblem,\n",
+    "    ExistingCPSolution,\n",
     "    TuckerProblem,\n",
     "    MissingData,\n",
     "    create_problem,\n",
@@ -367,9 +368,74 @@
     "solution, data = create_problem(cp_specific_params, missing_data_params)\n",
     "print(data)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31",
+   "metadata": {},
+   "source": [
+    "## Creating Sparse Problems (CP only)\n",
+    "If we assume each model parameter is the input to a Poisson process, then we can generate a sparse test problems. This requires that all the factor matrices and lambda be nonnegative. The default factor generator ('randn') won't work since it produces both positive and negative values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate factor matrices with a few large entries in each column\n",
+    "# This will be the basis of our solution\n",
+    "shape = (20, 15, 10)\n",
+    "num_factors = 4\n",
+    "A = []\n",
+    "for n in range(len(shape)):\n",
+    "    A.append(np.random.rand(shape[n], num_factors))\n",
+    "    for r in range(num_factors):\n",
+    "        p = np.random.permutation(np.arange(shape[n]))\n",
+    "        idx = p[1 : round(0.2 * shape[n])]\n",
+    "        A[n][idx, r] *= 10\n",
+    "S = ttb.ktensor(A)\n",
+    "# S.normalize(sort=True);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "33",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "S.normalize(sort=True).weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create sparse test problem based on the solution.\n",
+    "# `sparse_generation` controls how many insertions to make based on the solution.\n",
+    "# The weight vector of the solution is automatically rescaled to match the number of insertions.\n",
+    "existing_params = ExistingCPSolution(S, noise=0.0, sparse_generation=500)\n",
+    "print(f\"{S.weights=}\")\n",
+    "solution, data = create_problem(existing_params)\n",
+    "print(\n",
+    "    f\"num_nozeros: {data.nnz}\\n\"\n",
+    "    f\"total_insertions: {np.sum(data.vals)}\\n\"\n",
+    "    f\"original weights vs rescaled: {S.weights / solution.weights}\"\n",
+    ")"
+   ]
   }
  ],
- "metadata": {},
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
  "nbformat": 4,
  "nbformat_minor": 5
 }
diff --git a/pyttb/create_problem.py b/pyttb/create_problem.py
index f89cde6d..e10c8bf7 100644
--- a/pyttb/create_problem.py
+++ b/pyttb/create_problem.py
@@ -81,11 +81,6 @@ class CPProblem(BaseProblem):
         If data is sparse noise is only added to nonzero entries.
     weight_generator:
         Method to generate weights for ktensor solution.
-    sparse_generation:
-        Generate a sparse tensor that can be scaled so that the
-        column factors and weights are stochastic. Provide a number
-        of nonzeros to be inserted. A value in range [0,1) will be
-        interpreted as a ratio.
     """
 
     # NOTE inherited attributes are manually copy pasted, keep aligned between problems
@@ -130,6 +125,71 @@ def __post_init__(self):
         self.num_factors = self.num_factors or [2, 2, 2]
 
 
+@dataclass
+class ExistingSolution:
+    """Parameters for using an existing tensor solution.
+
+    Attributes
+    ----------
+    solution:
+        Pre-existing tensor solution (ktensor or ttensor).
+    noise:
+        Amount of Gaussian noise to add to solution.
+        If data is sparse noise is only added to nonzero entries.
+    """
+
+    solution: Union[ttb.ktensor, ttb.ttensor]
+    noise: float = 0.10
+
+    def __post_init__(self):
+        if not 0.0 <= self.noise <= 1.0:
+            raise ValueError(f"Noise must be in [0,1] but got {self.noise}")
+
+    @property
+    def symmetric(self) -> None:
+        """Get the symmetric modes from the solution."""
+        # ExistingSolution doesn't support symmetry constraints
+        return None
+
+
+@dataclass
+class ExistingTuckerSolution(ExistingSolution):
+    """Parameters for using an existing tucket tensor solution.
+
+    Attributes
+    ----------
+    solution:
+        Pre-existing ttensor solution.
+    noise:
+        Amount of Gaussian noise to add to solution.
+        If data is sparse noise is only added to nonzero entries.
+    """
+
+    solution: ttb.ttensor
+
+
+@dataclass
+class ExistingCPSolution(ExistingSolution):
+    """Parameters for using an existing tucket tensor solution.
+
+    Attributes
+    ----------
+    solution:
+        Pre-existing ktensor solution.
+    noise:
+        Amount of Gaussian noise to add to solution.
+        If data is sparse noise is only added to nonzero entries.
+    sparse_generation:
+        Generate a sparse tensor that can be scaled so that the
+        column factors and weights are stochastic. Provide a number
+        of nonzeros to be inserted. A value in range [0,1) will be
+        interpreted as a ratio.
+    """
+
+    solution: ttb.ktensor
+    sparse_generation: Optional[float] = None
+
+
 @dataclass
 class MissingData:
     """Parameters to control missing data.
@@ -276,7 +336,7 @@ def _create_missing_data_pattern(
 
 @overload
 def create_problem(
-    problem_params: CPProblem, missing_params: MissingData
+    problem_params: CPProblem, missing_params: Optional[MissingData] = None
 ) -> Tuple[
     ttb.ktensor, Union[ttb.tensor, ttb.sptensor]
 ]: ...  # pragma: no cover see coveragepy/issues/970
@@ -285,20 +345,29 @@ def create_problem(
 @overload
 def create_problem(
     problem_params: TuckerProblem,
-    missing_params: MissingData,
+    missing_params: Optional[MissingData] = None,
 ) -> Tuple[ttb.ttensor, ttb.tensor]: ...  # pragma: no cover see coveragepy/issues/970
 
 
+@overload
+def create_problem(
+    problem_params: ExistingSolution,
+    missing_params: Optional[MissingData] = None,
+) -> Tuple[
+    Union[ttb.ktensor, ttb.ttensor], Union[ttb.tensor, ttb.sptensor]
+]: ...  # pragma: no cover see coveragepy/issues/970
+
+
 def create_problem(
-    problem_params: Union[CPProblem, TuckerProblem],
-    missing_params: MissingData,
+    problem_params: Union[CPProblem, TuckerProblem, ExistingSolution],
+    missing_params: Optional[MissingData] = None,
 ) -> Tuple[Union[ttb.ktensor, ttb.ttensor], Union[ttb.tensor, ttb.sptensor]]:
     """Generate a problem and solution.
 
     Arguments
     ---------
     problem_params:
-        Parameters related to the problem to generate.
+        Parameters related to the problem to generate, or an existing solution.
     missing_params:
         Parameters to control missing data in the generated data/solution.
 
@@ -324,7 +393,19 @@ def create_problem(
     >>> diff = (solution.full() - data).norm() / solution.full().norm()
     >>> bool(np.isclose(diff, 0.1))
     True
+
+    Use existing solution
+
+    >>> factor_matrices = [np.random.random((dim, 3)) for dim in shape]
+    >>> weights = np.random.random(3)
+    >>> existing_ktensor = ttb.ktensor(factor_matrices, weights)
+    >>> existing_params = ExistingSolution(existing_ktensor, noise=0.1)
+    >>> solution, data = create_problem(existing_params, no_missing_data)
+    >>> assert solution is existing_ktensor
     """
+    if missing_params is None:
+        missing_params = MissingData()
+
     if problem_params.symmetric is not None:
         missing_params.raise_symmetric()
 
@@ -332,7 +413,7 @@ def create_problem(
 
     data: Union[ttb.tensor, ttb.sptensor]
     if (
-        isinstance(problem_params, CPProblem)
+        isinstance(problem_params, (CPProblem, ExistingCPSolution))
         and problem_params.sparse_generation is not None
     ):
         if missing_params.has_missing():
@@ -391,10 +472,18 @@ def generate_solution(
 ) -> ttb.ktensor: ...
 
 
+@overload
+def generate_solution(
+    problem_params: ExistingSolution,
+) -> Union[ttb.ktensor, ttb.ttensor]: ...
+
+
 def generate_solution(
-    problem_params: Union[CPProblem, TuckerProblem],
+    problem_params: Union[CPProblem, TuckerProblem, ExistingSolution],
 ) -> Union[ttb.ktensor, ttb.ttensor]:
     """Generate problem solution."""
+    if isinstance(problem_params, ExistingSolution):
+        return problem_params.solution
     factor_matrices = generate_solution_factors(problem_params)
     # Create final model
     if isinstance(problem_params, TuckerProblem):
@@ -414,7 +503,7 @@ def generate_solution(
 @overload
 def generate_data(
     solution: Union[ttb.ktensor, ttb.ttensor],
-    problem_params: BaseProblem,
+    problem_params: Union[BaseProblem, ExistingSolution],
     pattern: Optional[ttb.tensor] = None,
 ) -> ttb.tensor: ...  # pragma: no cover see coveragepy/issues/970
 
@@ -422,14 +511,14 @@ def generate_data(
 @overload
 def generate_data(
     solution: Union[ttb.ktensor, ttb.ttensor],
-    problem_params: BaseProblem,
+    problem_params: Union[BaseProblem, ExistingSolution],
     pattern: ttb.sptensor,
 ) -> ttb.sptensor: ...  # pragma: no cover see coveragepy/issues/970
 
 
 def generate_data(
     solution: Union[ttb.ktensor, ttb.ttensor],
-    problem_params: BaseProblem,
+    problem_params: Union[BaseProblem, ExistingSolution],
     pattern: Optional[Union[ttb.tensor, ttb.sptensor]] = None,
 ) -> Union[ttb.tensor, ttb.sptensor]:
     """Generate problem data."""
@@ -469,7 +558,7 @@ def prosample(nsamples: int, prob: np.ndarray) -> np.ndarray:
 
 def generate_data_sparse(
     solution: ttb.ktensor,
-    problem_params: CPProblem,
+    problem_params: Union[CPProblem, ExistingCPSolution],
 ) -> Tuple[ttb.ktensor, ttb.sptensor]:
     """Generate sparse CP data from a given solution."""
     # Error check on solution
@@ -483,7 +572,8 @@ def generate_data_sparse(
         raise ValueError("Cannot generate sparse data without sparse_generation set.")
 
     # Convert solution to probability tensor
-    P = solution.normalize(mode=0)
+    # NOTE: Make copy since normalize modifies in place
+    P = solution.copy().normalize(mode=0)
     eta = np.sum(P.weights)
     P.weights /= eta
 
@@ -512,7 +602,7 @@ def generate_data_sparse(
     allsubs = np.vstack(subs)
     # Assemble final tensor. Note that duplicates are summed.
     # TODO should we have sptenones for purposes like this?
-    Z = ttb.sptensor(
+    Z = ttb.sptensor.from_aggregator(
         allsubs,
         np.ones(
             (len(allsubs), 1),
@@ -522,6 +612,10 @@ def generate_data_sparse(
 
     # Rescale S so that it is proportional to the number of edges inserted
     solution = P
+    # raise ValueError(
+    #    f"{nedges=}"
+    #    f"{solution.weights=}"
+    # )
     solution.weights *= nedges
 
     # TODO no noise introduced in this special case in MATLAB
diff --git a/tests/test_create_problem.py b/tests/test_create_problem.py
index ee5e97a5..3e02cf97 100644
--- a/tests/test_create_problem.py
+++ b/tests/test_create_problem.py
@@ -5,6 +5,7 @@
 from pyttb.create_problem import (
     BaseProblem,
     CPProblem,
+    ExistingSolution,
     MissingData,
     TuckerProblem,
     create_problem,
@@ -63,6 +64,20 @@ def test_missingdata_logging(self, caplog):
         missing_params.get_pattern(arbitrary_shape)
         assert "missing elements" in caplog.text
 
+    def test_existing_solution(self, sample_ktensor_2way):
+        solution = sample_ktensor_2way
+        existing_solution = ExistingSolution(solution)
+        assert existing_solution.solution is solution
+        assert existing_solution.noise == 0.1
+
+        with pytest.raises(ValueError):
+            value_less_than_zero = -0.1
+            ExistingSolution(solution, noise=value_less_than_zero)
+
+        with pytest.raises(ValueError):
+            value_greater_than_one = 1.1
+            ExistingSolution(solution, noise=value_greater_than_one)
+
 
 def test_generate_solution_cp():
     # Smoke test with defaults
@@ -119,6 +134,12 @@ def test_create_problem_smoke():
     soln, data = create_problem(cp_params, missing_params)
     assert soln.full().shape == data.shape
 
+    existing_params = ExistingSolution(soln)
+    missing_params = MissingData()
+    soln, data = create_problem(existing_params, missing_params)
+    assert soln.full().shape == data.shape
+    assert soln is existing_params.solution, "Solution should be the same object"
+
     cp_params.symmetric = [(0, 1)]
     soln, data = create_problem(cp_params, missing_params)
     assert soln.full().shape == data.shape

From 9d31b15c9e216fae7af349d21fae802e200ff7c1 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Sat, 28 Jun 2025 12:14:32 -0400
Subject: [PATCH 19/20] Fix nbstripout

---
 docs/source/tutorial/utility_test_problem.ipynb | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/docs/source/tutorial/utility_test_problem.ipynb b/docs/source/tutorial/utility_test_problem.ipynb
index 92feb77a..583f3b8e 100644
--- a/docs/source/tutorial/utility_test_problem.ipynb
+++ b/docs/source/tutorial/utility_test_problem.ipynb
@@ -431,11 +431,7 @@
    ]
   }
  ],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
+ "metadata": {},
  "nbformat": 4,
  "nbformat_minor": 5
 }

From f7ef55a3ac116af1f836eb24907557b6fcd229f6 Mon Sep 17 00:00:00 2001
From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com>
Date: Sat, 28 Jun 2025 12:29:32 -0400
Subject: [PATCH 20/20] Update mypy to grab PR fixing 3.13 dataclass error:
 https://github.com/python/mypy/pull/18464

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 41261b93..ec0e7712 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ documentation = "https://pyttb.readthedocs.io"
 
 [project.optional-dependencies]
 dev = [
-    "mypy>=1.10,<1.14.0",
+    "mypy>=1.15,<1.16.0",
     # Also in pre-commit
     "nbstripout>=0.8,<0.9",
     "pytest>8.0",