|
26 | 26 | def gromov_wasserstein(C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None,
|
27 | 27 | max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
|
28 | 28 | r"""
|
29 |
| - Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` |
| 29 | + Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`. |
30 | 30 |
|
31 |
| - The function solves the following optimization problem: |
| 31 | + The function solves the following optimization problem using Conditional Gradient: |
32 | 32 |
|
33 | 33 | .. math::
|
34 | 34 | \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
|
@@ -182,9 +182,10 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs):
|
182 | 182 | def gromov_wasserstein2(C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None,
|
183 | 183 | max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
|
184 | 184 | r"""
|
185 |
| - Returns the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` |
| 185 | + Returns the Gromov-Wasserstein loss :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`. |
| 186 | + To recover the Gromov-Wasserstein distance as defined in [13] compute :math:`d_{GW} = \frac{1}{2} \sqrt{\mathbf{GW}}`. |
186 | 187 |
|
187 |
| - The function solves the following optimization problem: |
| 188 | + The function solves the following optimization problem using Conditional Gradient: |
188 | 189 |
|
189 | 190 | .. math::
|
190 | 191 | \mathbf{GW} = \min_\mathbf{T} \quad \sum_{i,j,k,l}
|
@@ -308,31 +309,36 @@ def gromov_wasserstein2(C1, C2, p=None, q=None, loss_fun='square_loss', symmetri
|
308 | 309 | def fused_gromov_wasserstein(M, C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, alpha=0.5,
|
309 | 310 | armijo=False, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
|
310 | 311 | r"""
|
311 |
| - Computes the FGW transport between two graphs (see :ref:`[24] <references-fused-gromov-wasserstein>`) |
| 312 | + Returns the Fused Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` |
| 313 | + with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}` (see :ref:`[24] <references-fused-gromov-wasserstein>`). |
| 314 | +
|
| 315 | + The function solves the following optimization problem using Conditional Gradient: |
312 | 316 |
|
313 | 317 | .. math::
|
314 |
| - \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + |
| 318 | + \mathbf{T}^* \in\mathop{\arg\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + |
315 | 319 | \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
|
316 | 320 |
|
317 | 321 | s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
|
318 | 322 |
|
319 | 323 | \mathbf{T}^T \mathbf{1} &= \mathbf{q}
|
320 | 324 |
|
321 | 325 | \mathbf{T} &\geq 0
|
| 326 | + Where : |
322 | 327 |
|
323 |
| - where : |
324 |
| -
|
325 |
| - - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix |
326 |
| - - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1) |
327 |
| - - `L` is a loss function to account for the misfit between the similarity matrices |
| 328 | + - :math:`\mathbf{M}`: metric cost matrix between features across domains |
| 329 | + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space |
| 330 | + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space |
| 331 | + - :math:`\mathbf{p}`: distribution in the source space |
| 332 | + - :math:`\mathbf{q}`: distribution in the target space |
| 333 | + - `L`: loss function to account for the misfit between the similarity and feature matrices |
| 334 | + - :math:`\alpha`: trade-off parameter |
328 | 335 |
|
329 | 336 | .. note:: This function is backend-compatible and will work on arrays
|
330 | 337 | from all compatible backends. But the algorithm uses the C++ CPU backend
|
331 | 338 | which can lead to copy overhead on GPU arrays.
|
332 | 339 | .. note:: All computations in the conjugate gradient solver are done with
|
333 | 340 | numpy to limit memory overhead.
|
334 | 341 |
|
335 |
| - The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] <references-fused-gromov-wasserstein>` |
336 | 342 |
|
337 | 343 | Parameters
|
338 | 344 | ----------
|
@@ -465,36 +471,39 @@ def line_search(cost, G, deltaG, Mi, cost_G, **kwargs):
|
465 | 471 | def fused_gromov_wasserstein2(M, C1, C2, p=None, q=None, loss_fun='square_loss', symmetric=None, alpha=0.5,
|
466 | 472 | armijo=False, G0=None, log=False, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
|
467 | 473 | r"""
|
468 |
| - Computes the FGW distance between two graphs see (see :ref:`[24] <references-fused-gromov-wasserstein2>`) |
| 474 | + Returns the Fused Gromov-Wasserstein distance between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` |
| 475 | + with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}` (see :ref:`[24] <references-fused-gromov-wasserstein>`). |
469 | 476 |
|
470 |
| - .. math:: |
471 |
| - \mathbf{GW} = \min_\mathbf{T} \quad (1 - \alpha) \langle \mathbf(T), \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l} |
472 |
| - L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} |
| 477 | + The function solves the following optimization problem using Conditional Gradient: |
473 | 478 |
|
474 |
| - s.t. \ \mathbf(T)\mathbf{1} &= \mathbf{p} |
| 479 | + .. math:: |
| 480 | + \mathbf{FGW} = \mathop{\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + |
| 481 | + \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} |
475 | 482 |
|
476 |
| - \mathbf(T)^T \mathbf{1} &= \mathbf{q} |
| 483 | + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} |
477 | 484 |
|
478 |
| - \mathbf(T) &\geq 0 |
| 485 | + \mathbf{T}^T \mathbf{1} &= \mathbf{q} |
479 | 486 |
|
480 |
| - where : |
| 487 | + \mathbf{T} &\geq 0 |
| 488 | + Where : |
481 | 489 |
|
482 |
| - - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix |
483 |
| - - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1) |
484 |
| - - `L` is a loss function to account for the misfit between the similarity matrices |
| 490 | + - :math:`\mathbf{M}`: metric cost matrix between features across domains |
| 491 | + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space |
| 492 | + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space |
| 493 | + - :math:`\mathbf{p}`: distribution in the source space |
| 494 | + - :math:`\mathbf{q}`: distribution in the target space |
| 495 | + - `L`: loss function to account for the misfit between the similarity and feature matrices |
| 496 | + - :math:`\alpha`: trade-off parameter |
485 | 497 |
|
486 |
| - The algorithm used for solving the problem is conditional gradient as |
487 |
| - discussed in :ref:`[24] <references-fused-gromov-wasserstein2>` |
| 498 | + Note that when using backends, this loss function is differentiable wrt the |
| 499 | + matrices (C1, C2, M) and weights (p, q) for quadratic loss using the gradients from [38]_. |
488 | 500 |
|
489 | 501 | .. note:: This function is backend-compatible and will work on arrays
|
490 | 502 | from all compatible backends. But the algorithm uses the C++ CPU backend
|
491 | 503 | which can lead to copy overhead on GPU arrays.
|
492 | 504 | .. note:: All computations in the conjugate gradient solver are done with
|
493 | 505 | numpy to limit memory overhead.
|
494 | 506 |
|
495 |
| - Note that when using backends, this loss function is differentiable wrt the |
496 |
| - matrices (C1, C2, M) and weights (p, q) for quadratic loss using the gradients from [38]_. |
497 |
| -
|
498 | 507 | Parameters
|
499 | 508 | ----------
|
500 | 509 | M : array-like, shape (ns, nt)
|
@@ -668,13 +677,13 @@ def gromov_barycenters(
|
668 | 677 | max_iter=1000, tol=1e-9, warmstartT=False, verbose=False, log=False,
|
669 | 678 | init_C=None, random_state=None, **kwargs):
|
670 | 679 | r"""
|
671 |
| - Returns the gromov-wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}` |
| 680 | + Returns the Gromov-Wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}` |
672 | 681 |
|
673 | 682 | The function solves the following optimization problem with block coordinate descent:
|
674 | 683 |
|
675 | 684 | .. math::
|
676 | 685 |
|
677 |
| - \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s) |
| 686 | + \mathbf{C}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s) |
678 | 687 |
|
679 | 688 | Where :
|
680 | 689 |
|
@@ -812,7 +821,21 @@ def fgw_barycenters(
|
812 | 821 | fixed_features=False, p=None, loss_fun='square_loss', armijo=False,
|
813 | 822 | symmetric=True, max_iter=100, tol=1e-9, warmstartT=False, verbose=False,
|
814 | 823 | log=False, init_C=None, init_X=None, random_state=None, **kwargs):
|
815 |
| - r"""Compute the fgw barycenter as presented eq (5) in :ref:`[24] <references-fgw-barycenters>` |
| 824 | + r""" |
| 825 | + Returns the Fused Gromov-Wasserstein barycenters of `S` measurable networks with node features :math:`(\mathbf{C}_s, \mathbf{Y}_s, \mathbf{p}_s)_{1 \leq s \leq S}` |
| 826 | + (see eq (5) in :ref:`[24] <references-fgw-barycenters>`), estimated using Fused Gromov-Wasserstein transports from Conditional Gradient solvers. |
| 827 | +
|
| 828 | + The function solves the following optimization problem: |
| 829 | +
|
| 830 | + .. math:: |
| 831 | +
|
| 832 | + \mathbf{C}^*, \mathbf{Y}^* = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}, \mathbf{Y}\in \mathbb{Y}^{N \times d}} \quad \sum_s \lambda_s \mathrm{FGW}_{\alpha}(\mathbf{C}, \mathbf{C}_s, \mathbf{Y}, \mathbf{Y}_s, \mathbf{p}, \mathbf{p}_s) |
| 833 | +
|
| 834 | + Where : |
| 835 | +
|
| 836 | + - :math:`\mathbf{Y}_s`: feature matrix |
| 837 | + - :math:`\mathbf{C}_s`: metric cost matrix |
| 838 | + - :math:`\mathbf{p}_s`: distribution |
816 | 839 |
|
817 | 840 | Parameters
|
818 | 841 | ----------
|
|
0 commit comments