From bd700607819cfd1222d42282b50a2235c50e545a Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 16 Oct 2022 23:52:54 -0700 Subject: [PATCH 01/31] Initial Implementation --- tests/unit/svm/__init__.py | 0 tests/unit/svm/linear_svc_test.py | 34 +++++++++++++ torchml/svm/__init__.py | 1 + torchml/svm/linear_svc.py | 84 +++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 tests/unit/svm/__init__.py create mode 100644 tests/unit/svm/linear_svc_test.py create mode 100644 torchml/svm/__init__.py create mode 100644 torchml/svm/linear_svc.py diff --git a/tests/unit/svm/__init__.py b/tests/unit/svm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py new file mode 100644 index 0000000..c1caf02 --- /dev/null +++ b/tests/unit/svm/linear_svc_test.py @@ -0,0 +1,34 @@ +import unittest +import numpy as np +import torch +from sklearn.datasets import make_classification +import sklearn.svm as svm +import time + +import torchml as ml +from torchml.svm import LinearSVC + +BSZ = 128 +DIM = 5 + + +class TestLinearSVC(unittest.TestCase): + def test_coef(self): + x, y = make_classification(n_samples=500, n_features=10, + n_classes=2) + lsvc = LinearSVC(verbose=0) + start = time.time() + lsvc.fit(torch.from_numpy(x), torch.from_numpy(y)) + end = time.time() + print(end - start) + print("Here") + start = time.time() + reflsvc = svm.LinearSVC() + reflsvc.fit(x, y) + end = time.time() + print(end - start) + print("Here") + self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=0.03)) + +if __name__ == "__main__": + unittest.main() diff --git a/torchml/svm/__init__.py b/torchml/svm/__init__.py new file mode 100644 index 0000000..c799ee5 --- /dev/null +++ b/torchml/svm/__init__.py @@ -0,0 +1 @@ +from .linear_svc import LinearSVC diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py new file mode 100644 index 0000000..e4f06f9 --- /dev/null +++ b/torchml/svm/linear_svc.py @@ -0,0 +1,84 @@ +import torch +from sklearn.datasets import make_classification + +import torchml as ml +import cvxpy as cp +from cvxpylayers.torch import CvxpyLayer + +from sklearn import svm + + +class LinearSVC(ml.Model): + + def __init__( + self, + penalty="l2", + loss="squared_hinge", + *, + dual=True, + tol=1e-4, + C=1.0, + multi_class="ovr", + fit_intercept=True, + intercept_scaling=1, + class_weight=None, + verbose=0, + random_state=None, + max_iter=1000, + ): + super(LinearSVC, self).__init__() + self.coef_ = None + self.intercept_ = None + self.classes_ = None + self.y_ = None + self.dual = dual + self.tol = tol + self.C = C + self.multi_class = multi_class + self.fit_intercept = fit_intercept + self.intercept_scaling = intercept_scaling + self.class_weight = class_weight + self.verbose = verbose + self.random_state = random_state + self.max_iter = max_iter + self.penalty = penalty + self.loss = loss + + def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): + if self.C < 0: + raise ValueError( + "Penalty term must be positive; got (C=%r)" % self.C) + self.classes_ = torch.unique(y) + self.y_ = y + assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" + + m, n = X.shape + + w = cp.Variable(n) + if self.fit_intercept: + b = cp.Variable() + + obj = 0 + for i in range(m): + if y[i] == self.classes_[1]: + yi = 1 + else: + yi = -1 + if self.fit_intercept: + obj += cp.square(cp.pos(1 - yi * (w.T @ X[i] + b))) + else: + obj += cp.sqaure(cp.pos(1 - yi * (w.T @ X[i]))) + + obj *= self.C + obj += cp.multiply((1 / 2.0), cp.norm(w, 2)) + + prob = cp.Problem(cp.Minimize(obj), []) + prob.solve() + self.coef_, self.intercept_ = torch.from_numpy(w.value), torch.from_numpy(b.value) + # if self.fit_intercept: + # fit_lr = CvxpyLayer(prob, [], [w, b]) + # else: + # fit_lr = CvxpyLayer(prob, [], [w]) + # + # self.weight, self.intercept = fit_lr() + return self From 252d64148a6acd6b6bf1d0e732c684efb6a76c79 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 18 Oct 2022 03:04:22 -0700 Subject: [PATCH 02/31] add tutorials for KNN --- docs/tutorials/neighbors.md | 79 +++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 docs/tutorials/neighbors.md diff --git a/docs/tutorials/neighbors.md b/docs/tutorials/neighbors.md new file mode 100644 index 0000000..0535bc4 --- /dev/null +++ b/docs/tutorials/neighbors.md @@ -0,0 +1,79 @@ +# Nearest Neighbors +`torchml.neighbors` currently supports Unsupervised learnings on classification problem. It currently supports K Nearest Neighbors classification with `torchml.neighbors.NearestNeighbors` that implement `sklearn.neighbors.NearestNeighbors`'s brute force solution with TorchML. + +## K Nearest Neighbors classification +The principle behind Nearest Neighbors algorithms is, given a distance function and a new test point $x$, the algorithm find k closest samples in the known sample set, and use them to estimate the $x$. The number $k$ can be user-defined and tuned according to the particular problem. The distance function can be any arbitrary metric function, and standard Euclidean distance is the most common choice. + +One important thing about this algorithm is that its not based on any probabilistic framework, but the algorithm is able to estimate probability for each class given a test point $x$ and its k neighbors. + +Given a dataset with $n$ samples and $b$ distinct classes, and a new point $x$ we wish to classify: +$\{x_i, y_i\}, i=1,2....n, y_i \in \{c_1, c_2, c_3... c_b\}$ + +We calculate the number of samples that fall into a class for all classes: +$\{n_a, a=1,2,3...b\}, \Sigma_{a=1}^{b}n_a = n$ + +We first find the $k$ nearest neighbors of $x$: +$\{x_j, y_j\}, i=1,2....k, y_j \in \{c_1, c_2, c_3... c_c\}$ + +We then count the number of points in the $k$ neighbors that are in the class $c$: +$\{nk_a, a=1,2,3...b\}, \Sigma_{a=1}^{b}nk_a = k$ + +The probability that $x$ is of class $c_c$ is simply: +$P(c_c | x)= {nk_c\over k}$ + +This estimation is often accurate in practice, even though the algorithm is not built with probability in mind. + +## KNN from a bayesian stand point +Even though the KNN algorithm is not built on top of probabilistic framework, we can gain intuition behind its shockingly good estimation by framing it in the bayesian framework. + +What we want is: +$P(c_c | x), c=1,2,3...b$ +and in bayesian terms, what we need is: +$P(c_c | x) = {{P(x | c_c)*P(c_c)} \over {P(x)}}$ +Given nothing but our samples, $P(c_c)$, or the prior, is simply $n_c \over n$ + +$P(x)$ is the probabilistic density of random variable $x$, and we need to borrow some knowledge from density estimation for this analysis: + +Since we don't know $P(x)$, we need to conduct discrete trials on $P(x)$. Suppose that the density $P(x)$ lies in a D-Dimensional space, and we assume it to be Euclidean. We conduct trials in this space by drawing $n$ points on it according to $P(x)$ (these $n$ points are our samples). By principle of locality, for a given point $x_t$ we've drawn on the space, we can assume that the density have some correlations with points in the small space surrounding it. Let's draw a small sphere around the point, and name the space in the sphere $R$. + +The total probability that a test point can end up inside $R$ is the sum of probability that a point can be in a point in $R$ over all the small points in $R$, or the probability mass of $P(x)$ in $R$: +$P_{in R} = {\int_{R} P(x)dx}$ + +For the $n$ samples we gathered, each sample has a probability $P_{in R}$ of being inside $R$, then the total number of $k$ points that successfully end up in $R$ can be modeled using binomial distribution: +$Bin(k|n,P_{in R}) = {n! \over {k!(n-k)!}}{P_{in R}^k}{(1-P_{in R})}^{n-k}$ + +We also have: +$E(k) = n*P_{in R}$ +$P_{in R} = {{E(k)} \over n}$ + +For our algorithm we supply the parameter $k$, so we can just sub in our well-chosen $k$ instead of the expectation, which gives us: +$k \approx n*P_{in R}$ +$P_{in R} \approx {k \over n}$ + +We further assume that $R$ is quite small, thus $P(x)$ changes very little inside $R$, and we assume $P(x)$ to follow a uniform distribution, then we can derive that: +$P_{in R} \approx P(x)V$ Where $V$ is the volume of $R$. + +Then our final estimation of $P(x)$ will be: +$P(x) = {{k}\over{nV}}$ + +We repeat the process for a specific class $c_c$, and we will get: +$P(x|c_c) = {{nk_c}\over{n_c V}}$ + +substitute both $P(x|c_c) = {{nk_c}\over{n_c V}}$ and $P(x) = {{k}\over{nV}}$ into our bayesian, we will get: +$P(c_c | x)= {nk_c\over k}$ + +## Sources +* Christopher M. Bishop. 2006. Pattern Recognition and Machine Learning (Information Science and Statistics). Springer-Verlag, Berlin, Heidelberg. +* [MIT Lecture on KNN](https://youtu.be/09mb78oiPkA) + + + + + + + + + + + + From fd1304776ead9e9845edd2aae02bf24f567f39f3 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 18 Oct 2022 03:08:28 -0700 Subject: [PATCH 03/31] Add link for sources --- docs/tutorials/neighbors.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/neighbors.md b/docs/tutorials/neighbors.md index 0535bc4..c4e3e88 100644 --- a/docs/tutorials/neighbors.md +++ b/docs/tutorials/neighbors.md @@ -63,7 +63,7 @@ substitute both $P(x|c_c) = {{nk_c}\over{n_c V}}$ and $P(x) = {{k}\over{nV}}$ in $P(c_c | x)= {nk_c\over k}$ ## Sources -* Christopher M. Bishop. 2006. Pattern Recognition and Machine Learning (Information Science and Statistics). Springer-Verlag, Berlin, Heidelberg. +* [Christopher M. Bishop. 2006. Pattern Recognition and Machine Learning (Information Science and Statistics). Springer-Verlag, Berlin, Heidelberg.](https://www.microsoft.com/en-us/research/uploads/prod/2006/01/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf) * [MIT Lecture on KNN](https://youtu.be/09mb78oiPkA) From d2930838252ef42f8de5bd7baf15814a7737ebb7 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 18 Oct 2022 10:47:53 -0700 Subject: [PATCH 04/31] remove doc --- docs/tutorials/neighbors.md | 79 ------------------------------------- torchml/svm/linear_svc.py | 47 +++++++++++----------- 2 files changed, 24 insertions(+), 102 deletions(-) delete mode 100644 docs/tutorials/neighbors.md diff --git a/docs/tutorials/neighbors.md b/docs/tutorials/neighbors.md deleted file mode 100644 index c4e3e88..0000000 --- a/docs/tutorials/neighbors.md +++ /dev/null @@ -1,79 +0,0 @@ -# Nearest Neighbors -`torchml.neighbors` currently supports Unsupervised learnings on classification problem. It currently supports K Nearest Neighbors classification with `torchml.neighbors.NearestNeighbors` that implement `sklearn.neighbors.NearestNeighbors`'s brute force solution with TorchML. - -## K Nearest Neighbors classification -The principle behind Nearest Neighbors algorithms is, given a distance function and a new test point $x$, the algorithm find k closest samples in the known sample set, and use them to estimate the $x$. The number $k$ can be user-defined and tuned according to the particular problem. The distance function can be any arbitrary metric function, and standard Euclidean distance is the most common choice. - -One important thing about this algorithm is that its not based on any probabilistic framework, but the algorithm is able to estimate probability for each class given a test point $x$ and its k neighbors. - -Given a dataset with $n$ samples and $b$ distinct classes, and a new point $x$ we wish to classify: -$\{x_i, y_i\}, i=1,2....n, y_i \in \{c_1, c_2, c_3... c_b\}$ - -We calculate the number of samples that fall into a class for all classes: -$\{n_a, a=1,2,3...b\}, \Sigma_{a=1}^{b}n_a = n$ - -We first find the $k$ nearest neighbors of $x$: -$\{x_j, y_j\}, i=1,2....k, y_j \in \{c_1, c_2, c_3... c_c\}$ - -We then count the number of points in the $k$ neighbors that are in the class $c$: -$\{nk_a, a=1,2,3...b\}, \Sigma_{a=1}^{b}nk_a = k$ - -The probability that $x$ is of class $c_c$ is simply: -$P(c_c | x)= {nk_c\over k}$ - -This estimation is often accurate in practice, even though the algorithm is not built with probability in mind. - -## KNN from a bayesian stand point -Even though the KNN algorithm is not built on top of probabilistic framework, we can gain intuition behind its shockingly good estimation by framing it in the bayesian framework. - -What we want is: -$P(c_c | x), c=1,2,3...b$ -and in bayesian terms, what we need is: -$P(c_c | x) = {{P(x | c_c)*P(c_c)} \over {P(x)}}$ -Given nothing but our samples, $P(c_c)$, or the prior, is simply $n_c \over n$ - -$P(x)$ is the probabilistic density of random variable $x$, and we need to borrow some knowledge from density estimation for this analysis: - -Since we don't know $P(x)$, we need to conduct discrete trials on $P(x)$. Suppose that the density $P(x)$ lies in a D-Dimensional space, and we assume it to be Euclidean. We conduct trials in this space by drawing $n$ points on it according to $P(x)$ (these $n$ points are our samples). By principle of locality, for a given point $x_t$ we've drawn on the space, we can assume that the density have some correlations with points in the small space surrounding it. Let's draw a small sphere around the point, and name the space in the sphere $R$. - -The total probability that a test point can end up inside $R$ is the sum of probability that a point can be in a point in $R$ over all the small points in $R$, or the probability mass of $P(x)$ in $R$: -$P_{in R} = {\int_{R} P(x)dx}$ - -For the $n$ samples we gathered, each sample has a probability $P_{in R}$ of being inside $R$, then the total number of $k$ points that successfully end up in $R$ can be modeled using binomial distribution: -$Bin(k|n,P_{in R}) = {n! \over {k!(n-k)!}}{P_{in R}^k}{(1-P_{in R})}^{n-k}$ - -We also have: -$E(k) = n*P_{in R}$ -$P_{in R} = {{E(k)} \over n}$ - -For our algorithm we supply the parameter $k$, so we can just sub in our well-chosen $k$ instead of the expectation, which gives us: -$k \approx n*P_{in R}$ -$P_{in R} \approx {k \over n}$ - -We further assume that $R$ is quite small, thus $P(x)$ changes very little inside $R$, and we assume $P(x)$ to follow a uniform distribution, then we can derive that: -$P_{in R} \approx P(x)V$ Where $V$ is the volume of $R$. - -Then our final estimation of $P(x)$ will be: -$P(x) = {{k}\over{nV}}$ - -We repeat the process for a specific class $c_c$, and we will get: -$P(x|c_c) = {{nk_c}\over{n_c V}}$ - -substitute both $P(x|c_c) = {{nk_c}\over{n_c V}}$ and $P(x) = {{k}\over{nV}}$ into our bayesian, we will get: -$P(c_c | x)= {nk_c\over k}$ - -## Sources -* [Christopher M. Bishop. 2006. Pattern Recognition and Machine Learning (Information Science and Statistics). Springer-Verlag, Berlin, Heidelberg.](https://www.microsoft.com/en-us/research/uploads/prod/2006/01/Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf) -* [MIT Lecture on KNN](https://youtu.be/09mb78oiPkA) - - - - - - - - - - - - diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index e4f06f9..5a1b274 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -52,33 +52,34 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): self.y_ = y assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" - m, n = X.shape - - w = cp.Variable(n) - if self.fit_intercept: - b = cp.Variable() - - obj = 0 - for i in range(m): - if y[i] == self.classes_[1]: - yi = 1 - else: - yi = -1 - if self.fit_intercept: - obj += cp.square(cp.pos(1 - yi * (w.T @ X[i] + b))) - else: - obj += cp.sqaure(cp.pos(1 - yi * (w.T @ X[i]))) - - obj *= self.C - obj += cp.multiply((1 / 2.0), cp.norm(w, 2)) - - prob = cp.Problem(cp.Minimize(obj), []) - prob.solve() - self.coef_, self.intercept_ = torch.from_numpy(w.value), torch.from_numpy(b.value) + # m, n = X.shape + # + # w = cp.Variable(n) + # if self.fit_intercept: + # b = cp.Variable() + # + # obj = 0 + # for i in range(m): + # if y[i] == self.classes_[1]: + # yi = 1 + # else: + # yi = -1 + # if self.fit_intercept: + # obj += cp.square(cp.pos(1 - yi * (w.T @ X[i] + b))) + # else: + # obj += cp.sqaure(cp.pos(1 - yi * (w.T @ X[i]))) + # + # obj *= self.C + # obj += cp.multiply((1 / 2.0), cp.norm(w, 2)) + # + # prob = cp.Problem(cp.Minimize(obj), []) + # prob.solve() + # self.coef_, self.intercept_ = torch.from_numpy(w.value), torch.from_numpy(b.value) # if self.fit_intercept: # fit_lr = CvxpyLayer(prob, [], [w, b]) # else: # fit_lr = CvxpyLayer(prob, [], [w]) # # self.weight, self.intercept = fit_lr() + return self From dcd2a1a44abcc5be01959cfe4778706383904c8a Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 18 Oct 2022 12:10:54 -0700 Subject: [PATCH 05/31] vectorized LinearSVC --- tests/unit/svm/linear_svc_test.py | 4 +-- torchml/svm/linear_svc.py | 57 +++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index c1caf02..1e5a787 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -14,20 +14,18 @@ class TestLinearSVC(unittest.TestCase): def test_coef(self): - x, y = make_classification(n_samples=500, n_features=10, + x, y = make_classification(n_samples=50000, n_features=10, n_classes=2) lsvc = LinearSVC(verbose=0) start = time.time() lsvc.fit(torch.from_numpy(x), torch.from_numpy(y)) end = time.time() print(end - start) - print("Here") start = time.time() reflsvc = svm.LinearSVC() reflsvc.fit(x, y) end = time.time() print(end - start) - print("Here") self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=0.03)) if __name__ == "__main__": diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index 5a1b274..21bd4e3 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -81,5 +81,62 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): # fit_lr = CvxpyLayer(prob, [], [w]) # # self.weight, self.intercept = fit_lr() + y = torch.unsqueeze(y, 1) + y = (y == self.classes_[1]).float() + y *= 2 + y -= 1 + + m, n = X.shape + + w = cp.Variable((n, 1)) + if self.fit_intercept: + b = cp.Variable() + X_param = cp.Parameter((m, n)) + y_param = cp.Parameter((m, 1)) + C_param = cp.Parameter(nonneg=True) + ones = torch.ones((m, 1)) + + # set up objective + if self.fit_intercept: + loss = cp.multiply((1 / 2.0), + cp.norm(w, 2)) + C_param * cp.sum(cp.square(cp.pos(ones - + cp.multiply(y_param, + X_param @ w + b)))) + else: + loss = (1 / (2 * m)) * cp.sum(cp.square(X_param @ w - y_param)) + + objective = loss + + # set up constraints + constraints = [] + + prob = cp.Problem(cp.Minimize(objective), constraints) + X_param.value = X.numpy() + y_param.value = y.numpy() + C_param.value = self.C + prob.solve(solver='ECOS', abstol=self.tol, max_iters=self.max_iter) + + # convert into pytorch layer + # if self.fit_intercept: + # fit_lr = CvxpyLayer(prob, [X_param, y_param, C_param], [w, b]) + # else: + # fit_lr = CvxpyLayer(prob, [X_param, y_param, C_param], [w]) + + # process input data + # if self.require_grad: + # X.requires_grad_(True) + # y.requires_grad_(True) + + # this object is now callable with pytorch tensors + + # if self.fit_intercept: + # self.weight, self.intercept = fit_lr( + # X, y, self.C + # ) + # else: + # self.weight = fit_lr(X, y, torch.tensor( + # self.alpha, dtype=torch.float64)) + self.coef_, self.intercept_ = torch.from_numpy(w.value), torch.from_numpy(b.value) + self.coef_ = torch.t(self.coef_) return self From 7117e9f24d4184cb31aa594fd5567298dfccf7cc Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 18 Oct 2022 12:20:15 -0700 Subject: [PATCH 06/31] support hinge loss --- torchml/svm/linear_svc.py | 55 ++++++++++++++------------------------- 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index 21bd4e3..d897090 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -52,38 +52,9 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): self.y_ = y assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" - # m, n = X.shape - # - # w = cp.Variable(n) - # if self.fit_intercept: - # b = cp.Variable() - # - # obj = 0 - # for i in range(m): - # if y[i] == self.classes_[1]: - # yi = 1 - # else: - # yi = -1 - # if self.fit_intercept: - # obj += cp.square(cp.pos(1 - yi * (w.T @ X[i] + b))) - # else: - # obj += cp.sqaure(cp.pos(1 - yi * (w.T @ X[i]))) - # - # obj *= self.C - # obj += cp.multiply((1 / 2.0), cp.norm(w, 2)) - # - # prob = cp.Problem(cp.Minimize(obj), []) - # prob.solve() - # self.coef_, self.intercept_ = torch.from_numpy(w.value), torch.from_numpy(b.value) - # if self.fit_intercept: - # fit_lr = CvxpyLayer(prob, [], [w, b]) - # else: - # fit_lr = CvxpyLayer(prob, [], [w]) - # - # self.weight, self.intercept = fit_lr() y = torch.unsqueeze(y, 1) - y = (y == self.classes_[1]).float() + y = (y != self.classes_[0]).float() y *= 2 y -= 1 @@ -97,15 +68,27 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): C_param = cp.Parameter(nonneg=True) ones = torch.ones((m, 1)) + loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) + # set up objective if self.fit_intercept: - loss = cp.multiply((1 / 2.0), - cp.norm(w, 2)) + C_param * cp.sum(cp.square(cp.pos(ones - - cp.multiply(y_param, - X_param @ w + b)))) + if self.loss == "squared_hinge": + loss += C_param * cp.sum(cp.square(cp.pos(ones - + cp.multiply(y_param, + X_param @ w + b)))) + elif self.loss == "hinge": + loss += C_param * cp.sum(cp.pos(ones - + cp.multiply(y_param, + X_param @ w + b))) else: - loss = (1 / (2 * m)) * cp.sum(cp.square(X_param @ w - y_param)) - + if self.loss == "squared_hinge": + loss += C_param * cp.sum(cp.square(cp.pos(ones - + cp.multiply(y_param, + X_param @ w)))) + elif self.loss == "hinge": + loss += C_param * cp.sum(cp.pos(ones - + cp.multiply(y_param, + X_param @ w))) objective = loss # set up constraints From c0c52839f3afd70b7dd9b7f761eaf8373a61a00f Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 18 Oct 2022 18:13:36 -0700 Subject: [PATCH 07/31] fix format --- tests/unit/svm/linear_svc_test.py | 24 +++++-- torchml/neighbors/nearest_centroid.py | 3 +- torchml/svm/linear_svc.py | 96 ++++++++++----------------- 3 files changed, 52 insertions(+), 71 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 1e5a787..d110bb7 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -8,25 +8,35 @@ import torchml as ml from torchml.svm import LinearSVC -BSZ = 128 -DIM = 5 +n_samples = 5000 +n_features = 10 +n_classes = 2 +n_informative = 8 class TestLinearSVC(unittest.TestCase): - def test_coef(self): - x, y = make_classification(n_samples=50000, n_features=10, - n_classes=2) - lsvc = LinearSVC(verbose=0) + def test_simple(self): + x, y = make_classification( + n_samples=n_samples, + n_features=n_features, + n_classes=n_classes, + n_informative=n_informative, + ) + lsvc = LinearSVC(max_iter=1000) start = time.time() lsvc.fit(torch.from_numpy(x), torch.from_numpy(y)) end = time.time() print(end - start) start = time.time() - reflsvc = svm.LinearSVC() + reflsvc = svm.LinearSVC(max_iter=1000) reflsvc.fit(x, y) end = time.time() print(end - start) self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=0.03)) + self.assertTrue( + np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=0.03) + ) + if __name__ == "__main__": unittest.main() diff --git a/torchml/neighbors/nearest_centroid.py b/torchml/neighbors/nearest_centroid.py index 96dbf7b..d6d7ae3 100644 --- a/torchml/neighbors/nearest_centroid.py +++ b/torchml/neighbors/nearest_centroid.py @@ -116,8 +116,7 @@ def predict(self, X: torch.tensor) -> torch.tensor: for i in range(X.size(dim=0)): ret[i] = self.classes_[ - torch.argmin(torch.nn.PairwiseDistance(p=2) - (X[i], self.centroids_)) + torch.argmin(torch.nn.PairwiseDistance(p=2)(X[i], self.centroids_)) ] # return ret.to(self.y_type) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index d897090..43d9657 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -9,28 +9,26 @@ class LinearSVC(ml.Model): - def __init__( - self, - penalty="l2", - loss="squared_hinge", - *, - dual=True, - tol=1e-4, - C=1.0, - multi_class="ovr", - fit_intercept=True, - intercept_scaling=1, - class_weight=None, - verbose=0, - random_state=None, - max_iter=1000, + self, + penalty="l2", + loss="squared_hinge", + *, + dual=True, + tol=1e-4, + C=1.0, + multi_class="ovr", + fit_intercept=True, + intercept_scaling=1, + class_weight=None, + verbose=0, + random_state=None, + max_iter=1000, ): super(LinearSVC, self).__init__() self.coef_ = None self.intercept_ = None self.classes_ = None - self.y_ = None self.dual = dual self.tol = tol self.C = C @@ -46,20 +44,21 @@ def __init__( def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): if self.C < 0: - raise ValueError( - "Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) self.classes_ = torch.unique(y) - self.y_ = y assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" + m, n = X.shape + + self.coef_ = torch.empty((0, n)) + self.intercept_ = torch.empty((0)) + y = torch.unsqueeze(y, 1) y = (y != self.classes_[0]).float() y *= 2 y -= 1 - m, n = X.shape - w = cp.Variable((n, 1)) if self.fit_intercept: b = cp.Variable() @@ -70,25 +69,16 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) - # set up objective if self.fit_intercept: - if self.loss == "squared_hinge": - loss += C_param * cp.sum(cp.square(cp.pos(ones - - cp.multiply(y_param, - X_param @ w + b)))) - elif self.loss == "hinge": - loss += C_param * cp.sum(cp.pos(ones - - cp.multiply(y_param, - X_param @ w + b))) + hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w + b)) else: - if self.loss == "squared_hinge": - loss += C_param * cp.sum(cp.square(cp.pos(ones - - cp.multiply(y_param, - X_param @ w)))) - elif self.loss == "hinge": - loss += C_param * cp.sum(cp.pos(ones - - cp.multiply(y_param, - X_param @ w))) + hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w)) + + if self.loss == "squared_hinge": + loss += C_param * cp.sum(cp.square(hinge)) + elif self.loss == "hinge": + loss += C_param * cp.sum(hinge) + objective = loss # set up constraints @@ -98,28 +88,10 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): X_param.value = X.numpy() y_param.value = y.numpy() C_param.value = self.C - prob.solve(solver='ECOS', abstol=self.tol, max_iters=self.max_iter) - - # convert into pytorch layer - # if self.fit_intercept: - # fit_lr = CvxpyLayer(prob, [X_param, y_param, C_param], [w, b]) - # else: - # fit_lr = CvxpyLayer(prob, [X_param, y_param, C_param], [w]) - - # process input data - # if self.require_grad: - # X.requires_grad_(True) - # y.requires_grad_(True) - - # this object is now callable with pytorch tensors - - # if self.fit_intercept: - # self.weight, self.intercept = fit_lr( - # X, y, self.C - # ) - # else: - # self.weight = fit_lr(X, y, torch.tensor( - # self.alpha, dtype=torch.float64)) - self.coef_, self.intercept_ = torch.from_numpy(w.value), torch.from_numpy(b.value) - self.coef_ = torch.t(self.coef_) + prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) + + self.coef_ = torch.cat((self.coef_, torch.t(torch.from_numpy(w.value)))) + self.intercept_ = torch.cat( + (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) + ) return self From 65bbe31ac748506ca7b6aee77b97b33b88aa93a1 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Fri, 21 Oct 2022 17:25:57 -0700 Subject: [PATCH 08/31] add support for multiclass --- tests/unit/svm/linear_svc_test.py | 19 +++++------ torchml/svm/linear_svc.py | 52 +++++++++++++++++-------------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index d110bb7..2a082df 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -5,36 +5,37 @@ import sklearn.svm as svm import time -import torchml as ml from torchml.svm import LinearSVC n_samples = 5000 n_features = 10 -n_classes = 2 -n_informative = 8 +n_classes = 5 +n_informative = 10 class TestLinearSVC(unittest.TestCase): - def test_simple(self): + def test_LinearSVC(self): x, y = make_classification( n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=n_informative, + n_redundant=n_features-n_informative ) lsvc = LinearSVC(max_iter=1000) start = time.time() lsvc.fit(torch.from_numpy(x), torch.from_numpy(y)) end = time.time() - print(end - start) + # print(end - start) start = time.time() - reflsvc = svm.LinearSVC(max_iter=1000) + reflsvc = svm.LinearSVC(max_iter=100000) reflsvc.fit(x, y) end = time.time() - print(end - start) - self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=0.03)) + # print(end - start) + self.assertTrue(np.allclose( + lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) self.assertTrue( - np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=0.03) + np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=1e-2) ) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index 43d9657..2da5511 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -1,29 +1,25 @@ import torch -from sklearn.datasets import make_classification import torchml as ml import cvxpy as cp -from cvxpylayers.torch import CvxpyLayer - -from sklearn import svm class LinearSVC(ml.Model): def __init__( - self, - penalty="l2", - loss="squared_hinge", - *, - dual=True, - tol=1e-4, - C=1.0, - multi_class="ovr", - fit_intercept=True, - intercept_scaling=1, - class_weight=None, - verbose=0, - random_state=None, - max_iter=1000, + self, + penalty="l2", + loss="squared_hinge", + *, + dual=True, + tol=1e-4, + C=1.0, + multi_class="ovr", + fit_intercept=True, + intercept_scaling=1, + class_weight=None, + verbose=0, + random_state=None, + max_iter=1000, ): super(LinearSVC, self).__init__() self.coef_ = None @@ -47,15 +43,22 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) self.classes_ = torch.unique(y) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" - m, n = X.shape - self.coef_ = torch.empty((0, n)) self.intercept_ = torch.empty((0)) + if self.classes_.shape[0] == 2: + self.fit_with_one_class_(X, y, self.classes_[1], sample_weight=sample_weight) + else: + for i, x in enumerate(self.classes_): + self.fit_with_one_class_(X, y, x, sample_weight=sample_weight) + + def fit_with_one_class_(self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None): + + m, n = X.shape y = torch.unsqueeze(y, 1) - y = (y != self.classes_[0]).float() + y = (y == fitting_class).float() y *= 2 y -= 1 @@ -91,7 +94,8 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) self.coef_ = torch.cat((self.coef_, torch.t(torch.from_numpy(w.value)))) - self.intercept_ = torch.cat( - (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) - ) + if self.fit_intercept: + self.intercept_ = torch.cat( + (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) + ) return self From 97872c467cda0e90b99cbe4a653453765366c780 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 25 Oct 2022 02:27:39 -0700 Subject: [PATCH 09/31] change n_informative --- tests/unit/svm/linear_svc_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 2a082df..7abed19 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -10,7 +10,7 @@ n_samples = 5000 n_features = 10 n_classes = 5 -n_informative = 10 +n_informative = 7 class TestLinearSVC(unittest.TestCase): From e1ba7d2ff0bfb7088f8a1f14551e7882a332474c Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Tue, 25 Oct 2022 02:51:47 -0700 Subject: [PATCH 10/31] implemented predict and decision function --- tests/unit/svm/linear_svc_test.py | 9 +++++++- torchml/svm/linear_svc.py | 37 ++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 7abed19..2b4296f 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -20,7 +20,7 @@ def test_LinearSVC(self): n_features=n_features, n_classes=n_classes, n_informative=n_informative, - n_redundant=n_features-n_informative + n_redundant=n_features - n_informative ) lsvc = LinearSVC(max_iter=1000) start = time.time() @@ -30,6 +30,7 @@ def test_LinearSVC(self): start = time.time() reflsvc = svm.LinearSVC(max_iter=100000) reflsvc.fit(x, y) + end = time.time() # print(end - start) self.assertTrue(np.allclose( @@ -37,6 +38,12 @@ def test_LinearSVC(self): self.assertTrue( np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=1e-2) ) + self.assertTrue( + np.allclose(lsvc.decision_function(torch.from_numpy(x)), reflsvc.decision_function(x), atol=1e-2) + ) + self.assertTrue( + np.allclose(lsvc.predict(torch.from_numpy(x)), reflsvc.predict(x), atol=1e-2) + ) if __name__ == "__main__": diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index 2da5511..a7c6ebd 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -40,19 +40,45 @@ def __init__( def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): if self.C < 0: - raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError( + "Penalty term must be positive; got (C=%r)" % self.C) self.classes_ = torch.unique(y) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape self.coef_ = torch.empty((0, n)) self.intercept_ = torch.empty((0)) if self.classes_.shape[0] == 2: - self.fit_with_one_class_(X, y, self.classes_[1], sample_weight=sample_weight) + self._fit_with_one_class( + X, y, self.classes_[1], sample_weight=sample_weight) else: for i, x in enumerate(self.classes_): - self.fit_with_one_class_(X, y, x, sample_weight=sample_weight) + self._fit_with_one_class(X, y, x, sample_weight=sample_weight) + + def decision_function(self, X : torch.Tensor) -> torch.Tensor: + return X @ self.coef_.T + self.intercept_ + + def predict(self, X: torch.Tensor) -> torch.Tensor: + """ + Predict class labels for samples in X. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The data matrix for which we want to get the predictions. + + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Vector containing the class labels for each sample. + """ + scores = self.decision_function(X) + if len(scores.shape) == 1: + indices = (scores > 0).int() + else: + indices = scores.argmax(dim=1) + return self.classes_[indices] - def fit_with_one_class_(self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None): + def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None): m, n = X.shape @@ -93,7 +119,8 @@ def fit_with_one_class_(self, X: torch.Tensor, y: torch.Tensor, fitting_class: a C_param.value = self.C prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) - self.coef_ = torch.cat((self.coef_, torch.t(torch.from_numpy(w.value)))) + self.coef_ = torch.cat( + (self.coef_, torch.t(torch.from_numpy(w.value)))) if self.fit_intercept: self.intercept_ = torch.cat( (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) From 8193a94f4a08c1b02c20057e95ca8968f68ed501 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 27 Oct 2022 22:09:52 -0700 Subject: [PATCH 11/31] add skeleton --- torchml/svm/__init__.py | 1 + torchml/svm/linear_svr.py | 128 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 torchml/svm/linear_svr.py diff --git a/torchml/svm/__init__.py b/torchml/svm/__init__.py index c799ee5..09ebc60 100644 --- a/torchml/svm/__init__.py +++ b/torchml/svm/__init__.py @@ -1 +1,2 @@ from .linear_svc import LinearSVC +from .linear_svr import LinearSVR diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py new file mode 100644 index 0000000..3c7cc07 --- /dev/null +++ b/torchml/svm/linear_svr.py @@ -0,0 +1,128 @@ +import torch + +import torchml as ml +import cvxpy as cp + + +class LinearSVR(ml.Model): + def __init__( + self, + penalty="l2", + loss="squared_hinge", + *, + dual=True, + tol=1e-4, + C=1.0, + multi_class="ovr", + fit_intercept=True, + intercept_scaling=1, + class_weight=None, + verbose=0, + random_state=None, + max_iter=1000, + ): + super(LinearSVC, self).__init__() + self.coef_ = None + self.intercept_ = None + self.classes_ = None + self.dual = dual + self.tol = tol + self.C = C + self.multi_class = multi_class + self.fit_intercept = fit_intercept + self.intercept_scaling = intercept_scaling + self.class_weight = class_weight + self.verbose = verbose + self.random_state = random_state + self.max_iter = max_iter + self.penalty = penalty + self.loss = loss + + def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): + if self.C < 0: + raise ValueError( + "Penalty term must be positive; got (C=%r)" % self.C) + self.classes_ = torch.unique(y) + assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" + m, n = X.shape + self.coef_ = torch.empty((0, n)) + self.intercept_ = torch.empty((0)) + if self.classes_.shape[0] == 2: + self._fit_with_one_class( + X, y, self.classes_[1], sample_weight=sample_weight) + else: + for i, x in enumerate(self.classes_): + self._fit_with_one_class(X, y, x, sample_weight=sample_weight) + + def decision_function(self, X: torch.Tensor) -> torch.Tensor: + return X @ self.coef_.T + self.intercept_ + + def predict(self, X: torch.Tensor) -> torch.Tensor: + """ + Predict class labels for samples in X. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The data matrix for which we want to get the predictions. + + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Vector containing the class labels for each sample. + """ + scores = self.decision_function(X) + if len(scores.shape) == 1: + indices = (scores > 0).int() + else: + indices = scores.argmax(dim=1) + return self.classes_[indices] + + def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None): + + m, n = X.shape + + y = torch.unsqueeze(y, 1) + + y = (y == fitting_class).float() + y *= 2 + y -= 1 + + w = cp.Variable((n, 1)) + if self.fit_intercept: + b = cp.Variable() + X_param = cp.Parameter((m, n)) + y_param = cp.Parameter((m, 1)) + C_param = cp.Parameter(nonneg=True) + ones = torch.ones((m, 1)) + + loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) + + if self.fit_intercept: + hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w + b)) + else: + hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w)) + + if self.loss == "squared_hinge": + loss += C_param * cp.sum(cp.square(hinge)) + elif self.loss == "hinge": + loss += C_param * cp.sum(hinge) + + objective = loss + + # set up constraints + constraints = [] + + prob = cp.Problem(cp.Minimize(objective), constraints) + X_param.value = X.numpy() + y_param.value = y.numpy() + C_param.value = self.C + prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) + + self.coef_ = torch.cat( + (self.coef_, torch.t(torch.from_numpy(w.value)))) + if self.fit_intercept: + self.intercept_ = torch.cat( + (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) + ) + return self From cbd5d01ebe4fadfbf7704a97c7039797a162e44d Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 27 Oct 2022 22:51:26 -0700 Subject: [PATCH 12/31] implemented svr --- tests/unit/svm/linear_svr_test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/unit/svm/linear_svr_test.py diff --git a/tests/unit/svm/linear_svr_test.py b/tests/unit/svm/linear_svr_test.py new file mode 100644 index 0000000..e69de29 From 9eefcdb1ce29788728ac5252ee19cf00130cad86 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 27 Oct 2022 22:51:31 -0700 Subject: [PATCH 13/31] implemented svr --- tests/unit/svm/linear_svc_test.py | 15 +++-- tests/unit/svm/linear_svr_test.py | 45 +++++++++++++ torchml/svm/linear_svc.py | 43 +++++++------ torchml/svm/linear_svr.py | 102 +++++++++--------------------- 4 files changed, 106 insertions(+), 99 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 2b4296f..2f1826f 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -20,7 +20,7 @@ def test_LinearSVC(self): n_features=n_features, n_classes=n_classes, n_informative=n_informative, - n_redundant=n_features - n_informative + n_redundant=n_features - n_informative, ) lsvc = LinearSVC(max_iter=1000) start = time.time() @@ -33,16 +33,21 @@ def test_LinearSVC(self): end = time.time() # print(end - start) - self.assertTrue(np.allclose( - lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) + self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) self.assertTrue( np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=1e-2) ) self.assertTrue( - np.allclose(lsvc.decision_function(torch.from_numpy(x)), reflsvc.decision_function(x), atol=1e-2) + np.allclose( + lsvc.decision_function(torch.from_numpy(x)), + reflsvc.decision_function(x), + atol=1e-2, + ) ) self.assertTrue( - np.allclose(lsvc.predict(torch.from_numpy(x)), reflsvc.predict(x), atol=1e-2) + np.allclose( + lsvc.predict(torch.from_numpy(x)), reflsvc.predict(x), atol=1e-2 + ) ) diff --git a/tests/unit/svm/linear_svr_test.py b/tests/unit/svm/linear_svr_test.py index e69de29..c5c7a69 100644 --- a/tests/unit/svm/linear_svr_test.py +++ b/tests/unit/svm/linear_svr_test.py @@ -0,0 +1,45 @@ +import unittest +import numpy as np +import torch +from sklearn.datasets import make_regression +import sklearn.svm as svm +import time + +from torchml.svm import LinearSVR + +n_samples = 5000 +n_features = 10 +n_informative = 7 + + +class TestLinearSVR(unittest.TestCase): + def test_LinearSVR(self): + x, y = make_regression( + n_samples=n_samples, + n_features=n_features, + n_informative=n_informative, + ) + lsvr = LinearSVR(max_iter=1000) + start = time.time() + lsvr.fit(torch.from_numpy(x), torch.from_numpy(y)) + end = time.time() + print(end - start) + start = time.time() + reflsvr = svm.LinearSVR(max_iter=100000) + reflsvr.fit(x, y) + + end = time.time() + print(end - start) + self.assertTrue(np.allclose(lsvr.coef_.numpy(), reflsvr.coef_, atol=1e-2)) + self.assertTrue( + np.allclose(lsvr.intercept_.numpy(), reflsvr.intercept_, atol=1e-2) + ) + self.assertTrue( + np.allclose( + lsvr.predict(torch.from_numpy(x)), reflsvr.predict(x), atol=1e-2 + ) + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index a7c6ebd..79f583d 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -6,20 +6,20 @@ class LinearSVC(ml.Model): def __init__( - self, - penalty="l2", - loss="squared_hinge", - *, - dual=True, - tol=1e-4, - C=1.0, - multi_class="ovr", - fit_intercept=True, - intercept_scaling=1, - class_weight=None, - verbose=0, - random_state=None, - max_iter=1000, + self, + penalty="l2", + loss="squared_hinge", + *, + dual=True, + tol=1e-4, + C=1.0, + multi_class="ovr", + fit_intercept=True, + intercept_scaling=1, + class_weight=None, + verbose=0, + random_state=None, + max_iter=1000, ): super(LinearSVC, self).__init__() self.coef_ = None @@ -40,8 +40,7 @@ def __init__( def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): if self.C < 0: - raise ValueError( - "Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) self.classes_ = torch.unique(y) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape @@ -49,12 +48,13 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): self.intercept_ = torch.empty((0)) if self.classes_.shape[0] == 2: self._fit_with_one_class( - X, y, self.classes_[1], sample_weight=sample_weight) + X, y, self.classes_[1], sample_weight=sample_weight + ) else: for i, x in enumerate(self.classes_): self._fit_with_one_class(X, y, x, sample_weight=sample_weight) - def decision_function(self, X : torch.Tensor) -> torch.Tensor: + def decision_function(self, X: torch.Tensor) -> torch.Tensor: return X @ self.coef_.T + self.intercept_ def predict(self, X: torch.Tensor) -> torch.Tensor: @@ -78,7 +78,9 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: indices = scores.argmax(dim=1) return self.classes_[indices] - def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None): + def _fit_with_one_class( + self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None + ): m, n = X.shape @@ -119,8 +121,7 @@ def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: a C_param.value = self.C prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) - self.coef_ = torch.cat( - (self.coef_, torch.t(torch.from_numpy(w.value)))) + self.coef_ = torch.cat((self.coef_, torch.t(torch.from_numpy(w.value)))) if self.fit_intercept: self.intercept_ = torch.cat( (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index 3c7cc07..454c3c6 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -6,106 +6,61 @@ class LinearSVR(ml.Model): def __init__( - self, - penalty="l2", - loss="squared_hinge", - *, - dual=True, - tol=1e-4, - C=1.0, - multi_class="ovr", - fit_intercept=True, - intercept_scaling=1, - class_weight=None, - verbose=0, - random_state=None, - max_iter=1000, + self, + *, + epsilon=0.0, + tol=1e-4, + C=1.0, + loss="epsilon_insensitive", + fit_intercept=True, + intercept_scaling=1.0, + dual=True, + verbose=0, + random_state=None, + max_iter=1000, ): - super(LinearSVC, self).__init__() - self.coef_ = None + super(LinearSVR, self).__init__() self.intercept_ = None + self.coef_ = None self.classes_ = None - self.dual = dual self.tol = tol self.C = C - self.multi_class = multi_class + self.epsilon = epsilon self.fit_intercept = fit_intercept self.intercept_scaling = intercept_scaling - self.class_weight = class_weight self.verbose = verbose self.random_state = random_state self.max_iter = max_iter - self.penalty = penalty + self.dual = dual self.loss = loss def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): if self.C < 0: - raise ValueError( - "Penalty term must be positive; got (C=%r)" % self.C) - self.classes_ = torch.unique(y) + raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape - self.coef_ = torch.empty((0, n)) - self.intercept_ = torch.empty((0)) - if self.classes_.shape[0] == 2: - self._fit_with_one_class( - X, y, self.classes_[1], sample_weight=sample_weight) - else: - for i, x in enumerate(self.classes_): - self._fit_with_one_class(X, y, x, sample_weight=sample_weight) - - def decision_function(self, X: torch.Tensor) -> torch.Tensor: - return X @ self.coef_.T + self.intercept_ - - def predict(self, X: torch.Tensor) -> torch.Tensor: - """ - Predict class labels for samples in X. - - Parameters - ---------- - X : {array-like, sparse matrix} of shape (n_samples, n_features) - The data matrix for which we want to get the predictions. - - Returns - ------- - y_pred : ndarray of shape (n_samples,) - Vector containing the class labels for each sample. - """ - scores = self.decision_function(X) - if len(scores.shape) == 1: - indices = (scores > 0).int() - else: - indices = scores.argmax(dim=1) - return self.classes_[indices] - - def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None): - m, n = X.shape y = torch.unsqueeze(y, 1) - y = (y == fitting_class).float() - y *= 2 - y -= 1 - w = cp.Variable((n, 1)) if self.fit_intercept: b = cp.Variable() X_param = cp.Parameter((m, n)) y_param = cp.Parameter((m, 1)) C_param = cp.Parameter(nonneg=True) - ones = torch.ones((m, 1)) + epi_param = cp.Parameter() loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) if self.fit_intercept: - hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w + b)) + hinge = cp.pos(cp.abs(y_param - (X_param @ w + b)) - epi_param) else: - hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w)) + hinge = cp.pos(cp.abs(y_param - (X_param @ w + b)) - epi_param) - if self.loss == "squared_hinge": + if self.loss == "epsilon_insensitive": loss += C_param * cp.sum(cp.square(hinge)) - elif self.loss == "hinge": + elif self.loss == "squared_epsilon_insensitive": loss += C_param * cp.sum(hinge) objective = loss @@ -117,12 +72,13 @@ def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: a X_param.value = X.numpy() y_param.value = y.numpy() C_param.value = self.C + epi_param.value = self.epsilon prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) - self.coef_ = torch.cat( - (self.coef_, torch.t(torch.from_numpy(w.value)))) - if self.fit_intercept: - self.intercept_ = torch.cat( - (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) - ) + self.coef_, self.intercept_ = torch.flatten( + torch.from_numpy(w.value) + ), torch.flatten(torch.from_numpy(b.value)) return self + + def predict(self, X: torch.Tensor) -> torch.Tensor: + return X @ self.coef_ + self.intercept_ From 4abcbc9c075e9c216828a781e198712b299bdd8c Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 27 Oct 2022 22:53:07 -0700 Subject: [PATCH 14/31] black the repo --- tests/unit/svm/linear_svc_test.py | 15 +++++++---- torchml/svm/linear_svc.py | 43 ++++++++++++++++--------------- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 2b4296f..2f1826f 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -20,7 +20,7 @@ def test_LinearSVC(self): n_features=n_features, n_classes=n_classes, n_informative=n_informative, - n_redundant=n_features - n_informative + n_redundant=n_features - n_informative, ) lsvc = LinearSVC(max_iter=1000) start = time.time() @@ -33,16 +33,21 @@ def test_LinearSVC(self): end = time.time() # print(end - start) - self.assertTrue(np.allclose( - lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) + self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) self.assertTrue( np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=1e-2) ) self.assertTrue( - np.allclose(lsvc.decision_function(torch.from_numpy(x)), reflsvc.decision_function(x), atol=1e-2) + np.allclose( + lsvc.decision_function(torch.from_numpy(x)), + reflsvc.decision_function(x), + atol=1e-2, + ) ) self.assertTrue( - np.allclose(lsvc.predict(torch.from_numpy(x)), reflsvc.predict(x), atol=1e-2) + np.allclose( + lsvc.predict(torch.from_numpy(x)), reflsvc.predict(x), atol=1e-2 + ) ) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index a7c6ebd..79f583d 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -6,20 +6,20 @@ class LinearSVC(ml.Model): def __init__( - self, - penalty="l2", - loss="squared_hinge", - *, - dual=True, - tol=1e-4, - C=1.0, - multi_class="ovr", - fit_intercept=True, - intercept_scaling=1, - class_weight=None, - verbose=0, - random_state=None, - max_iter=1000, + self, + penalty="l2", + loss="squared_hinge", + *, + dual=True, + tol=1e-4, + C=1.0, + multi_class="ovr", + fit_intercept=True, + intercept_scaling=1, + class_weight=None, + verbose=0, + random_state=None, + max_iter=1000, ): super(LinearSVC, self).__init__() self.coef_ = None @@ -40,8 +40,7 @@ def __init__( def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): if self.C < 0: - raise ValueError( - "Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) self.classes_ = torch.unique(y) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape @@ -49,12 +48,13 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): self.intercept_ = torch.empty((0)) if self.classes_.shape[0] == 2: self._fit_with_one_class( - X, y, self.classes_[1], sample_weight=sample_weight) + X, y, self.classes_[1], sample_weight=sample_weight + ) else: for i, x in enumerate(self.classes_): self._fit_with_one_class(X, y, x, sample_weight=sample_weight) - def decision_function(self, X : torch.Tensor) -> torch.Tensor: + def decision_function(self, X: torch.Tensor) -> torch.Tensor: return X @ self.coef_.T + self.intercept_ def predict(self, X: torch.Tensor) -> torch.Tensor: @@ -78,7 +78,9 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: indices = scores.argmax(dim=1) return self.classes_[indices] - def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None): + def _fit_with_one_class( + self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None + ): m, n = X.shape @@ -119,8 +121,7 @@ def _fit_with_one_class(self, X: torch.Tensor, y: torch.Tensor, fitting_class: a C_param.value = self.C prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) - self.coef_ = torch.cat( - (self.coef_, torch.t(torch.from_numpy(w.value)))) + self.coef_ = torch.cat((self.coef_, torch.t(torch.from_numpy(w.value)))) if self.fit_intercept: self.intercept_ = torch.cat( (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) From 7638bed593e5d75777a4136508f2c50d3c9aa156 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 30 Oct 2022 18:52:42 -0700 Subject: [PATCH 15/31] add docs for linearSVC --- torchml/svm/linear_svc.py | 104 ++++++++++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 9 deletions(-) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index 79f583d..8d38b66 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -5,6 +5,77 @@ class LinearSVC(ml.Model): + """ + ## Description + + Unsupervised learner for implementing KNN Classifier. + + ## References + + 1. Bernhard E. Boser, Isabelle M. Guyon, and Vladimir N. Vapnik. 1992. A training algorithm for optimal margin classifiers. In Proceedings of the fifth annual workshop on Computational learning theory (COLT '92). Association for Computing Machinery, New York, NY, USA, 144–152. https://doi.org/10.1145/130385.130401 + 2. MIT 6.034 Artificial Intelligence, Fall 2010, [16. Learning: Support Vector Machines](https://youtu.be/_PwhiWxHK8o) + 3. The scikit-learn [documentation page](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html) for LinearSVC. + + ## Arguments + + * `penalty` (str {'l1', 'l2'}, default=’l2’): + Specifies the norm used in the penalization. + + * `loss` (str {‘hinge’, ‘squared_hinge’}, default=’squared_hinge’): + Specifies the loss function. ‘hinge’ is the standard SVM loss. + + * `dual` (bool, default=True): + Dummy variable to keep consistency with SKlearn's API, always 'False' for now. + + * `tol` (float, default=1e-4) + Tolerance for stopping criteria. + + * `C` (float, default=1.0): + Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. + + * `multi_class` (str {‘ovr’, ‘crammer_singer’}, default=’ovr’): + Dummy variable, always 'ovr' (one class over all the other as a single class) + + * `fit_intercept` (bool, default=True): + Whether to calculate the intercept for this model. + + * `intercept_scaling` (float, default=1): + Dummy variable to mimic the sklearn API, always 1 for now + + * `class_weight` (dict or str ‘balanced’, default=None): + Dummy variable to mimic the sklearn API, always None for now + + * `verbose` (int, default=0): + Dummy variable to mimic the sklearn API, always 0 for now + + * `random_state` (int, RandomState instance or None, default=None): + Dummy variable to mimic the sklearn API, always None for now + + * `max_iter` (int, default=1000): + The maximum number of iterations to be run for the underneath convex solver. + + + ## Example + + ~~~python + import numpy as np + from torchml.svm import LinearSVC + from sklearn.datasets import make_classification + + x, y = make_classification( + n_samples=n_samples, + n_features=n_features, + n_classes=n_classes, + n_informative=n_informative, + n_redundant=n_features - n_informative, + ) + svc = LinearSVC(max_iter=1000) + svc.fit(torch.from_numpy(x), torch.from_numpy(y)) + svc.decision_function(torch.from_numpy(x) + svc.predict(torch.from_numpy(x)) + ~~~ + """ + def __init__( self, penalty="l2", @@ -39,6 +110,16 @@ def __init__( self.loss = loss def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): + """ + ## Description + + Initialize the class with training sets + + ## Arguments + * `X` (torch.Tensor): the training set + * `y` (torch.Tensor, default=None): the class labels for each sample + + """ if self.C < 0: raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) self.classes_ = torch.unique(y) @@ -55,21 +136,26 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): self._fit_with_one_class(X, y, x, sample_weight=sample_weight) def decision_function(self, X: torch.Tensor) -> torch.Tensor: + """ + ## Description + + Predict confidence scores for samples. + + ## Arguments + * `X` (torch.Tensor): the data set for which we want to get the confidence scores. + + """ return X @ self.coef_.T + self.intercept_ def predict(self, X: torch.Tensor) -> torch.Tensor: """ - Predict class labels for samples in X. + ## Description + + Predict the class labels for the provided data. - Parameters - ---------- - X : {array-like, sparse matrix} of shape (n_samples, n_features) - The data matrix for which we want to get the predictions. + ## Arguments - Returns - ------- - y_pred : ndarray of shape (n_samples,) - Vector containing the class labels for each sample. + * `X` (torch.Tensor): the target point """ scores = self.decision_function(X) if len(scores.shape) == 1: From 9610f737a7a427e5946a7707ce614baea729a11e Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 30 Oct 2022 18:55:24 -0700 Subject: [PATCH 16/31] fix doc --- torchml/svm/linear_svc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index 8d38b66..e491842 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -8,7 +8,7 @@ class LinearSVC(ml.Model): """ ## Description - Unsupervised learner for implementing KNN Classifier. + Support vector classifier with cvxpy ## References @@ -121,7 +121,8 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): """ if self.C < 0: - raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError( + "Penalty term must be positive; got (C=%r)" % self.C) self.classes_ = torch.unique(y) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape @@ -207,7 +208,8 @@ def _fit_with_one_class( C_param.value = self.C prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) - self.coef_ = torch.cat((self.coef_, torch.t(torch.from_numpy(w.value)))) + self.coef_ = torch.cat( + (self.coef_, torch.t(torch.from_numpy(w.value)))) if self.fit_intercept: self.intercept_ = torch.cat( (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) From 63cdd9a87beb70ecc1f2ddadc37aaa755b974f36 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 30 Oct 2022 19:08:17 -0700 Subject: [PATCH 17/31] add docs for linearSVR --- torchml/svm/linear_svr.py | 82 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index 454c3c6..301a236 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -5,6 +5,68 @@ class LinearSVR(ml.Model): + """ + ## Description + + Support vector regressor with cvxpy + + ## References + + 1. Bernhard E. Boser, Isabelle M. Guyon, and Vladimir N. Vapnik. 1992. A training algorithm for optimal margin classifiers. In Proceedings of the fifth annual workshop on Computational learning theory (COLT '92). Association for Computing Machinery, New York, NY, USA, 144–152. https://doi.org/10.1145/130385.130401 + 2. MIT 6.034 Artificial Intelligence, Fall 2010, [16. Learning: Support Vector Machines](https://youtu.be/_PwhiWxHK8o) + 3. The scikit-learn [documentation page](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html) for LinearSVC. + + ## Arguments + + * `loss` (str {‘epsilon_insensitive’, ‘squared_epsilon_insensitive’}, default=’epsilon_insensitive’): + Specifies the loss function. + + * `epsilon` (float, default=0.0): + Epsilon parameter in the epsilon-insensitive loss function. + + * `tol` (float, default=1e-4) + Tolerance for stopping criteria. + + * `C` (float, default=1.0): + Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. + + * `fit_intercept` (bool, default=True): + Whether to calculate the intercept for this model. + + * `intercept_scaling` (float, default=1): + Dummy variable to mimic the sklearn API, always 1 for now + + * `dual` (bool, default=True): + Dummy variable to keep consistency with SKlearn's API, always 'False' for now. + + * `verbose` (int, default=0): + Dummy variable to mimic the sklearn API, always 0 for now + + * `random_state` (int, RandomState instance or None, default=None): + Dummy variable to mimic the sklearn API, always None for now + + * `max_iter` (int, default=1000): + The maximum number of iterations to be run for the underneath convex solver. + + + ## Example + + ~~~python + import numpy as np + from torchml.svm import LinearSVR + from sklearn.datasets import make_regression + + x, y = make_regression( + n_samples=n_samples, + n_features=n_features, + n_informative=n_informative, + ) + svr = LinearSVR(max_iter=1000) + svr.fit(torch.from_numpy(x), torch.from_numpy(y)) + svr.predict(torch.from_numpy(x)) + ~~~ + """ + def __init__( self, *, @@ -35,6 +97,17 @@ def __init__( self.loss = loss def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): + """ + ## Description + + Initialize the class with training sets + + ## Arguments + * `X` (torch.Tensor): the training set + * `y` (torch.Tensor): Target vector relative to X. + * `sample_weight` (default=None): Dummy variable for feature not supported yet. + """ + if self.C < 0: raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" @@ -81,4 +154,13 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): return self def predict(self, X: torch.Tensor) -> torch.Tensor: + """ + ## Description + + Predict using the linear model + + ## Arguments + + * `X` (torch.Tensor): Samples. + """ return X @ self.coef_ + self.intercept_ From 943224aaf25eb658453103b6a38e0a7c3ac82485 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Fri, 4 Nov 2022 20:53:29 -0700 Subject: [PATCH 18/31] add dpp formulation --- tests/unit/svm/linear_svc_test.py | 4 ++-- torchml/svm/linear_svc.py | 34 ++++++++++++++++++++----------- torchml/svm/linear_svr.py | 3 ++- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 2f1826f..9172cc4 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -8,9 +8,9 @@ from torchml.svm import LinearSVC n_samples = 5000 -n_features = 10 +n_features = 5 n_classes = 5 -n_informative = 7 +n_informative = 5 class TestLinearSVC(unittest.TestCase): diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index 79f583d..f05ac25 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -2,6 +2,7 @@ import torchml as ml import cvxpy as cp +from cvxpylayers.torch import CvxpyLayer class LinearSVC(ml.Model): @@ -94,21 +95,19 @@ def _fit_with_one_class( if self.fit_intercept: b = cp.Variable() X_param = cp.Parameter((m, n)) - y_param = cp.Parameter((m, 1)) - C_param = cp.Parameter(nonneg=True) ones = torch.ones((m, 1)) loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) if self.fit_intercept: - hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w + b)) + hinge = cp.pos(ones - cp.multiply(y, X_param @ w + b)) else: - hinge = cp.pos(ones - cp.multiply(y_param, X_param @ w)) + hinge = cp.pos(ones - cp.multiply(y, X_param @ w)) if self.loss == "squared_hinge": - loss += C_param * cp.sum(cp.square(hinge)) + loss += cp.multiply(self.C, cp.sum(cp.square(hinge))) elif self.loss == "hinge": - loss += C_param * cp.sum(hinge) + loss += cp.multiply(self.C, cp.sum(hinge)) objective = loss @@ -116,14 +115,25 @@ def _fit_with_one_class( constraints = [] prob = cp.Problem(cp.Minimize(objective), constraints) - X_param.value = X.numpy() - y_param.value = y.numpy() - C_param.value = self.C - prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) + assert prob.is_dpp() + + # convert into pytorch layer + if self.fit_intercept: + fit_lr = CvxpyLayer(prob, [X_param], [w, b]) + else: + fit_lr = CvxpyLayer(prob, [X_param], [w]) + + # prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) + if self.fit_intercept: + weight, intercept = fit_lr(X, solver_args={"solve_method": "ECOS", "abstol": self.tol, "max_iters": self.max_iter}) + else: + weight = fit_lr(X, solver_args={"solve_method": "ECOS", "abstol": self.tol, "max_iters": self.max_iter}) + + self.coef_ = torch.cat((self.coef_, torch.t(weight))) + - self.coef_ = torch.cat((self.coef_, torch.t(torch.from_numpy(w.value)))) if self.fit_intercept: self.intercept_ = torch.cat( - (self.intercept_, torch.unsqueeze(torch.from_numpy(b.value), 0)) + (self.intercept_, torch.unsqueeze(intercept, 0)) ) return self diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index 301a236..b24194e 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -109,7 +109,8 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): """ if self.C < 0: - raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError( + "Penalty term must be positive; got (C=%r)" % self.C) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape m, n = X.shape From 22afc05ddaedf2f0c74b46e444bf9eb29e977b6b Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Fri, 4 Nov 2022 21:22:52 -0700 Subject: [PATCH 19/31] add gradient support --- tests/unit/svm/linear_svc_test.py | 2 +- torchml/svm/linear_svc.py | 41 ++++++++++++++++--------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 9172cc4..35c4169 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -7,7 +7,7 @@ from torchml.svm import LinearSVC -n_samples = 5000 +n_samples = 1000 n_features = 5 n_classes = 5 n_informative = 5 diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index f05ac25..18fa800 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -7,20 +7,20 @@ class LinearSVC(ml.Model): def __init__( - self, - penalty="l2", - loss="squared_hinge", - *, - dual=True, - tol=1e-4, - C=1.0, - multi_class="ovr", - fit_intercept=True, - intercept_scaling=1, - class_weight=None, - verbose=0, - random_state=None, - max_iter=1000, + self, + penalty="l2", + loss="squared_hinge", + *, + dual=True, + tol=1e-4, + C=1.0, + multi_class="ovr", + fit_intercept=True, + intercept_scaling=1, + class_weight=None, + verbose=0, + random_state=None, + max_iter=1000, ): super(LinearSVC, self).__init__() self.coef_ = None @@ -56,7 +56,8 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): self._fit_with_one_class(X, y, x, sample_weight=sample_weight) def decision_function(self, X: torch.Tensor) -> torch.Tensor: - return X @ self.coef_.T + self.intercept_ + scores = X @ self.coef_.T + self.intercept_ + return scores.ravel() if scores.shape[1] == 1 else scores def predict(self, X: torch.Tensor) -> torch.Tensor: """ @@ -74,13 +75,13 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: """ scores = self.decision_function(X) if len(scores.shape) == 1: - indices = (scores > 0).int() + indices = (scores > 0).long() else: indices = scores.argmax(dim=1) return self.classes_[indices] def _fit_with_one_class( - self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None + self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None ): m, n = X.shape @@ -105,7 +106,7 @@ def _fit_with_one_class( hinge = cp.pos(ones - cp.multiply(y, X_param @ w)) if self.loss == "squared_hinge": - loss += cp.multiply(self.C, cp.sum(cp.square(hinge))) + loss += cp.multiply(self.C, cp.sum(cp.square(hinge))) elif self.loss == "hinge": loss += cp.multiply(self.C, cp.sum(hinge)) @@ -125,13 +126,13 @@ def _fit_with_one_class( # prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) if self.fit_intercept: - weight, intercept = fit_lr(X, solver_args={"solve_method": "ECOS", "abstol": self.tol, "max_iters": self.max_iter}) + weight, intercept = fit_lr(X, solver_args={"solve_method": "ECOS", "abstol": self.tol, + "max_iters": self.max_iter}) else: weight = fit_lr(X, solver_args={"solve_method": "ECOS", "abstol": self.tol, "max_iters": self.max_iter}) self.coef_ = torch.cat((self.coef_, torch.t(weight))) - if self.fit_intercept: self.intercept_ = torch.cat( (self.intercept_, torch.unsqueeze(intercept, 0)) From 9364f721167696afb09b3c75de1d9c2862b5e95d Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Fri, 4 Nov 2022 21:25:05 -0700 Subject: [PATCH 20/31] make format --- torchml/svm/linear_svc.py | 24 ++++++++++++++++++------ torchml/svm/linear_svr.py | 3 +-- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index c35e37d..ff9614e 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -147,7 +147,7 @@ def decision_function(self, X: torch.Tensor) -> torch.Tensor: """ scores = X @ self.coef_.T + self.intercept_ - return scores.ravel() if scores.shape[1] == 1 else scores + return scores.ravel() if scores.shape[1] == 1 else scores def predict(self, X: torch.Tensor) -> torch.Tensor: """ @@ -167,9 +167,8 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: return self.classes_[indices] def _fit_with_one_class( - self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None + self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None ): - m, n = X.shape y = torch.unsqueeze(y, 1) @@ -212,10 +211,23 @@ def _fit_with_one_class( # prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) if self.fit_intercept: - weight, intercept = fit_lr(X, solver_args={"solve_method": "ECOS", "abstol": self.tol, - "max_iters": self.max_iter}) + weight, intercept = fit_lr( + X, + solver_args={ + "solve_method": "ECOS", + "abstol": self.tol, + "max_iters": self.max_iter, + }, + ) else: - weight = fit_lr(X, solver_args={"solve_method": "ECOS", "abstol": self.tol, "max_iters": self.max_iter}) + weight = fit_lr( + X, + solver_args={ + "solve_method": "ECOS", + "abstol": self.tol, + "max_iters": self.max_iter, + }, + ) self.coef_ = torch.cat((self.coef_, torch.t(weight))) diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index b24194e..301a236 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -109,8 +109,7 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): """ if self.C < 0: - raise ValueError( - "Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape m, n = X.shape From 3c0d3ce1a4c6a91ac5ea006c7413f6644bf2e46c Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sat, 5 Nov 2022 16:55:01 -0700 Subject: [PATCH 21/31] fix tests --- tests/unit/svm/linear_svc_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 35c4169..82fd726 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -7,7 +7,7 @@ from torchml.svm import LinearSVC -n_samples = 1000 +n_samples = 4000 n_features = 5 n_classes = 5 n_informative = 5 @@ -33,7 +33,8 @@ def test_LinearSVC(self): end = time.time() # print(end - start) - self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) + self.assertTrue(np.allclose( + lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) self.assertTrue( np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=1e-2) ) From 60f670c3ddcaf0a1520b4a7ae8a627a682b238a8 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 10 Nov 2022 18:12:51 -0800 Subject: [PATCH 22/31] add pylayers to linear svr --- .../neighbors/k_neighbors_classifier_test.py | 2 +- .../unit/neighbors/nearest_neighbors_test.py | 2 +- tests/unit/svm/linear_svc_test.py | 2 +- tests/unit/svm/linear_svr_test.py | 4 +- torchml/svm/linear_svr.py | 52 +++++++++++++------ 5 files changed, 41 insertions(+), 21 deletions(-) diff --git a/tests/unit/neighbors/k_neighbors_classifier_test.py b/tests/unit/neighbors/k_neighbors_classifier_test.py index 1db5479..14fc05a 100644 --- a/tests/unit/neighbors/k_neighbors_classifier_test.py +++ b/tests/unit/neighbors/k_neighbors_classifier_test.py @@ -10,7 +10,7 @@ class TestkneighborsClassifier(unittest.TestCase): def test_knn_classifier(self): - for i in range(1, 20, 1): + for i in range(1, 5, 1): X = np.random.randn(BSZ, DIM) y = np.random.randint(low=-100, high=100, size=BSZ) p = np.random.randn(5, DIM) diff --git a/tests/unit/neighbors/nearest_neighbors_test.py b/tests/unit/neighbors/nearest_neighbors_test.py index 28ceb54..c74053a 100644 --- a/tests/unit/neighbors/nearest_neighbors_test.py +++ b/tests/unit/neighbors/nearest_neighbors_test.py @@ -10,7 +10,7 @@ class Testkneighbors(unittest.TestCase): def test_kneighbors(self): - for i in range(1, 200, 1): + for i in range(1, 5, 1): X = np.random.randn(BSZ, DIM) y = np.random.randn(5, DIM) ref = neighbors.NearestNeighbors(p=i) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 82fd726..32e8390 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -7,7 +7,7 @@ from torchml.svm import LinearSVC -n_samples = 4000 +n_samples = 5000 n_features = 5 n_classes = 5 n_informative = 5 diff --git a/tests/unit/svm/linear_svr_test.py b/tests/unit/svm/linear_svr_test.py index c5c7a69..6a0fa33 100644 --- a/tests/unit/svm/linear_svr_test.py +++ b/tests/unit/svm/linear_svr_test.py @@ -23,13 +23,13 @@ def test_LinearSVR(self): start = time.time() lsvr.fit(torch.from_numpy(x), torch.from_numpy(y)) end = time.time() - print(end - start) + # print(end - start) start = time.time() reflsvr = svm.LinearSVR(max_iter=100000) reflsvr.fit(x, y) end = time.time() - print(end - start) + # print(end - start) self.assertTrue(np.allclose(lsvr.coef_.numpy(), reflsvr.coef_, atol=1e-2)) self.assertTrue( np.allclose(lsvr.intercept_.numpy(), reflsvr.intercept_, atol=1e-2) diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index 301a236..2805ec7 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -2,6 +2,7 @@ import torchml as ml import cvxpy as cp +from cvxpylayers.torch import CvxpyLayer class LinearSVR(ml.Model): @@ -109,7 +110,8 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): """ if self.C < 0: - raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError( + "Penalty term must be positive; got (C=%r)" % self.C) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape m, n = X.shape @@ -120,21 +122,18 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): if self.fit_intercept: b = cp.Variable() X_param = cp.Parameter((m, n)) - y_param = cp.Parameter((m, 1)) - C_param = cp.Parameter(nonneg=True) - epi_param = cp.Parameter() loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) if self.fit_intercept: - hinge = cp.pos(cp.abs(y_param - (X_param @ w + b)) - epi_param) + hinge = cp.pos(cp.abs(y - (X_param @ w + b)) - self.epsilon) else: - hinge = cp.pos(cp.abs(y_param - (X_param @ w + b)) - epi_param) + hinge = cp.pos(cp.abs(y - (X_param @ w + b)) - self.epsilon) if self.loss == "epsilon_insensitive": - loss += C_param * cp.sum(cp.square(hinge)) + loss += self.C * cp.sum(cp.square(hinge)) elif self.loss == "squared_epsilon_insensitive": - loss += C_param * cp.sum(hinge) + loss += self.C * cp.sum(hinge) objective = loss @@ -142,15 +141,36 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): constraints = [] prob = cp.Problem(cp.Minimize(objective), constraints) + assert prob.is_dpp() X_param.value = X.numpy() - y_param.value = y.numpy() - C_param.value = self.C - epi_param.value = self.epsilon - prob.solve(solver="ECOS", abstol=self.tol, max_iters=self.max_iter) - - self.coef_, self.intercept_ = torch.flatten( - torch.from_numpy(w.value) - ), torch.flatten(torch.from_numpy(b.value)) + if self.fit_intercept: + fit_lr = CvxpyLayer(prob, [X_param], [w, b]) + else: + fit_lr = CvxpyLayer(prob, [X_param], [w]) + + if self.fit_intercept: + self.coef_, self.intercept_ = fit_lr( + X, + solver_args={ + "solve_method": "ECOS", + "abstol": self.tol, + "max_iters": self.max_iter, + }, + ) + else: + self.coef_, = fit_lr( + X, + solver_args={ + "solve_method": "ECOS", + "abstol": self.tol, + "max_iters": self.max_iter, + }, + ) + + self.coef_ = torch.flatten(self.coef_) + if self.fit_intercept: + self.intercept_ = torch.flatten(self.intercept_) + return self def predict(self, X: torch.Tensor) -> torch.Tensor: From 902e13f2097abe6a0e92fab8022172fcb5e93109 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 10 Nov 2022 18:15:04 -0800 Subject: [PATCH 23/31] make format --- tests/unit/svm/linear_svc_test.py | 3 +-- torchml/svm/linear_svr.py | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 32e8390..9172cc4 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -33,8 +33,7 @@ def test_LinearSVC(self): end = time.time() # print(end - start) - self.assertTrue(np.allclose( - lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) + self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) self.assertTrue( np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=1e-2) ) diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index 2805ec7..14047e6 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -110,8 +110,7 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): """ if self.C < 0: - raise ValueError( - "Penalty term must be positive; got (C=%r)" % self.C) + raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape m, n = X.shape @@ -158,7 +157,7 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): }, ) else: - self.coef_, = fit_lr( + (self.coef_,) = fit_lr( X, solver_args={ "solve_method": "ECOS", From 28840a409e3a160bc9d22dba8d7ee97ec31fa0c6 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 10 Nov 2022 18:39:53 -0800 Subject: [PATCH 24/31] add knnclassifier gradcheck --- tests/unit/neighbors/k_neighbors_classifier_test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/unit/neighbors/k_neighbors_classifier_test.py b/tests/unit/neighbors/k_neighbors_classifier_test.py index 14fc05a..e0075f2 100644 --- a/tests/unit/neighbors/k_neighbors_classifier_test.py +++ b/tests/unit/neighbors/k_neighbors_classifier_test.py @@ -3,6 +3,8 @@ import torch import torchml as ml import sklearn.neighbors as neighbors +from torch.autograd import gradcheck + BSZ = 1000 DIM = 50 @@ -13,7 +15,7 @@ def test_knn_classifier(self): for i in range(1, 5, 1): X = np.random.randn(BSZ, DIM) y = np.random.randint(low=-100, high=100, size=BSZ) - p = np.random.randn(5, DIM) + p = np.random.randn(1, DIM) ref = neighbors.KNeighborsClassifier( weights="distance" if i % 2 else "uniform", p=i @@ -26,13 +28,19 @@ def test_knn_classifier(self): weights="distance" if i % 2 else "uniform", p=i ) test.fit(torch.from_numpy(X), torch.from_numpy(y)) + inputP = torch.from_numpy(p) + inputP.requires_grad = True + testr = test.predict(torch.from_numpy(p)) testp = test.predict_proba(torch.from_numpy(p)) + self.assertTrue(gradcheck(test.predict, inputP, eps=1e-6, atol=1e-3)) + # self.assertTrue(gradcheck(test.predict_proba, inputP, eps=1e-20, atol=1e-3)) self.assertTrue(np.allclose(refr, testr.numpy())) self.assertTrue(np.allclose(refp, testp.numpy())) refr2 = ref.kneighbors(p) testr2 = test.kneighbors(torch.from_numpy(p)) + self.assertTrue(gradcheck(test.kneighbors, inputP, eps=1e-6, atol=1e-3)) self.assertTrue(np.allclose(refr2[0], testr2[0].numpy())) self.assertTrue(np.allclose(refr2[1], testr2[1].numpy())) From 899e6f16eabd95321e9789403e89c95c5981b988 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 10 Nov 2022 18:45:58 -0800 Subject: [PATCH 25/31] add gradcheck to neighbors --- tests/unit/neighbors/k_neighbors_classifier_test.py | 2 +- tests/unit/neighbors/nearest_centroids_test.py | 4 ++++ tests/unit/neighbors/nearest_neighbors_test.py | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/unit/neighbors/k_neighbors_classifier_test.py b/tests/unit/neighbors/k_neighbors_classifier_test.py index e0075f2..7333fbc 100644 --- a/tests/unit/neighbors/k_neighbors_classifier_test.py +++ b/tests/unit/neighbors/k_neighbors_classifier_test.py @@ -15,7 +15,7 @@ def test_knn_classifier(self): for i in range(1, 5, 1): X = np.random.randn(BSZ, DIM) y = np.random.randint(low=-100, high=100, size=BSZ) - p = np.random.randn(1, DIM) + p = np.random.randn(5, DIM) ref = neighbors.KNeighborsClassifier( weights="distance" if i % 2 else "uniform", p=i diff --git a/tests/unit/neighbors/nearest_centroids_test.py b/tests/unit/neighbors/nearest_centroids_test.py index 4a6dc5e..5f2b35b 100644 --- a/tests/unit/neighbors/nearest_centroids_test.py +++ b/tests/unit/neighbors/nearest_centroids_test.py @@ -3,6 +3,7 @@ import torch import torchml as ml import sklearn.neighbors as neighbors +from torch.autograd import gradcheck # define numbers of classes & features SAMPLES = 10 @@ -26,6 +27,9 @@ def test_kneighbors(self): refres = ref.predict(samp) centres = cent.predict(torch.from_numpy(samp)).numpy() self.assertTrue(np.array_equal(refres, centres)) + inputSamp = torch.from_numpy(samp) + inputSamp.requires_grad = True + self.assertTrue(gradcheck(cent.predict, inputSamp, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/tests/unit/neighbors/nearest_neighbors_test.py b/tests/unit/neighbors/nearest_neighbors_test.py index c74053a..ea274fe 100644 --- a/tests/unit/neighbors/nearest_neighbors_test.py +++ b/tests/unit/neighbors/nearest_neighbors_test.py @@ -3,6 +3,7 @@ import torch import torchml as ml import sklearn.neighbors as neighbors +from torch.autograd import gradcheck BSZ = 128 DIM = 5 @@ -24,6 +25,9 @@ def test_kneighbors(self): # return distance is true self.assertTrue(np.allclose(test[0], res[0].numpy())) self.assertTrue(np.allclose(test[1], res[1].numpy())) + inputY = torch.from_numpy(y) + inputY.requires_grad = True + self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) ref = neighbors.NearestNeighbors(p=i) ref.fit(X) @@ -35,6 +39,7 @@ def test_kneighbors(self): # return distance is false self.assertTrue(np.allclose(test, res.numpy())) + self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) if __name__ == "__main__": From 688c4395541a8b88dcc974a887802a3b23a9099d Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Thu, 10 Nov 2022 18:58:14 -0800 Subject: [PATCH 26/31] add gradcheck to all --- .../gaussian_naive_bayes/gaussian_nb_test.py | 5 +++++ tests/unit/linear_model/lasso_test.py | 16 ++++++++++++++++ .../unit/linear_model/linear_regression_test.py | 6 ++++++ tests/unit/linear_model/ridge_test.py | 11 +++++++++++ tests/unit/svm/linear_svc_test.py | 7 +++++++ tests/unit/svm/linear_svr_test.py | 5 +++++ torchml/svm/linear_svr.py | 2 +- 7 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tests/unit/gaussian_naive_bayes/gaussian_nb_test.py b/tests/unit/gaussian_naive_bayes/gaussian_nb_test.py index 3c66c09..73b994b 100644 --- a/tests/unit/gaussian_naive_bayes/gaussian_nb_test.py +++ b/tests/unit/gaussian_naive_bayes/gaussian_nb_test.py @@ -3,6 +3,7 @@ import torch import torchml as ml from sklearn.naive_bayes import GaussianNB +from torch.autograd import gradcheck BSZ = 128 @@ -25,6 +26,10 @@ def test_fit(self): self.assertTrue(np.allclose(ref_preds, model_preds.numpy())) self.assertTrue(np.allclose(ref_preds, model_forward.numpy())) + inputX = torch.from_numpy(X) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/tests/unit/linear_model/lasso_test.py b/tests/unit/linear_model/lasso_test.py index b02a15e..72b525d 100644 --- a/tests/unit/linear_model/lasso_test.py +++ b/tests/unit/linear_model/lasso_test.py @@ -3,6 +3,7 @@ import torch import torchml as ml import sklearn.linear_model as linear_model +from torch.autograd import gradcheck BSZ = 128 @@ -32,6 +33,11 @@ def test_fit(self): ) ) + inputX = torch.from_numpy(X) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + def test_fit_intercept(self): X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) @@ -54,6 +60,11 @@ def test_fit_intercept(self): ) ) + inputX = torch.from_numpy(X) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + def test_fit_positive(self): X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) @@ -76,6 +87,11 @@ def test_fit_positive(self): ) ) + inputX = torch.from_numpy(X) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/linear_model/linear_regression_test.py b/tests/unit/linear_model/linear_regression_test.py index 0d36710..7eab44e 100644 --- a/tests/unit/linear_model/linear_regression_test.py +++ b/tests/unit/linear_model/linear_regression_test.py @@ -3,6 +3,7 @@ import torch import torchml as ml import sklearn.linear_model as linear_model +from torch.autograd import gradcheck BSZ = 128 @@ -26,6 +27,11 @@ def test_fit(self): self.assertTrue(np.allclose(ref_preds, model_preds.numpy())) self.assertTrue(np.allclose(ref_preds, model_forward.numpy())) + inputX = torch.from_numpy(X) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/linear_model/ridge_test.py b/tests/unit/linear_model/ridge_test.py index 315275e..30c257e 100644 --- a/tests/unit/linear_model/ridge_test.py +++ b/tests/unit/linear_model/ridge_test.py @@ -3,6 +3,7 @@ import torch import torchml as ml import sklearn.linear_model as linear_model +from torch.autograd import gradcheck BSZ = 128 @@ -26,6 +27,11 @@ def test_fit(self): self.assertTrue(np.allclose(ref_preds, model_preds.numpy())) self.assertTrue(np.allclose(ref_preds, model_forward.numpy())) + inputX = torch.from_numpy(X) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + def test_fit_intercept(self): X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) @@ -42,6 +48,11 @@ def test_fit_intercept(self): self.assertTrue(np.allclose(ref_preds, model_preds.numpy())) self.assertTrue(np.allclose(ref_preds, model_forward.numpy())) + inputX = torch.from_numpy(X) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 9172cc4..3561794 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -4,6 +4,7 @@ from sklearn.datasets import make_classification import sklearn.svm as svm import time +from torch.autograd import gradcheck from torchml.svm import LinearSVC @@ -44,11 +45,17 @@ def test_LinearSVC(self): atol=1e-2, ) ) + + inputX = torch.from_numpy(x) + inputX.requires_grad = True + self.assertTrue(gradcheck(lsvc.decision_function, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue( np.allclose( lsvc.predict(torch.from_numpy(x)), reflsvc.predict(x), atol=1e-2 ) ) + self.assertTrue(gradcheck(lsvc.predict, inputX, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/tests/unit/svm/linear_svr_test.py b/tests/unit/svm/linear_svr_test.py index 6a0fa33..e6b0857 100644 --- a/tests/unit/svm/linear_svr_test.py +++ b/tests/unit/svm/linear_svr_test.py @@ -4,6 +4,7 @@ from sklearn.datasets import make_regression import sklearn.svm as svm import time +from torch.autograd import gradcheck from torchml.svm import LinearSVR @@ -40,6 +41,10 @@ def test_LinearSVR(self): ) ) + inputX = torch.from_numpy(x) + inputX.requires_grad = True + self.assertTrue(gradcheck(lsvr.predict, inputX, eps=1e-6, atol=1e-3)) + if __name__ == "__main__": unittest.main() diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index 14047e6..913390e 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -141,7 +141,7 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): prob = cp.Problem(cp.Minimize(objective), constraints) assert prob.is_dpp() - X_param.value = X.numpy() + if self.fit_intercept: fit_lr = CvxpyLayer(prob, [X_param], [w, b]) else: From 2a0f810022f94d21d2b8a01dc7f55f15ef5284d7 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 13 Nov 2022 20:37:02 -0800 Subject: [PATCH 27/31] add lasso gpu support --- tests/unit/linear_model/lasso_test.py | 145 ++++++++++++++------------ torchml/linear_model/lasso.py | 6 +- 2 files changed, 80 insertions(+), 71 deletions(-) diff --git a/tests/unit/linear_model/lasso_test.py b/tests/unit/linear_model/lasso_test.py index 72b525d..f88ed25 100644 --- a/tests/unit/linear_model/lasso_test.py +++ b/tests/unit/linear_model/lasso_test.py @@ -12,85 +12,92 @@ class TestLasso(unittest.TestCase): def test_fit(self): - X = np.random.randn(BSZ, DIM) - y = np.random.randn(BSZ, 1) - - ref = linear_model.Lasso(fit_intercept=False) - ref.fit(X, y) - ref_preds = ref.predict(X) - - model = ml.linear_model.Lasso() - model.fit(torch.from_numpy(X), torch.from_numpy(y)) - model_preds = model.predict(torch.from_numpy(X)) - model_forward = model(torch.from_numpy(X)) - - self.assertTrue( - np.allclose(ref_preds, model_preds[0].detach().numpy().flatten(), atol=1e-3) - ) - self.assertTrue( - np.allclose( - ref_preds, model_forward[0].detach().numpy().flatten(), atol=1e-3 + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + + X = np.random.randn(BSZ, DIM) + y = np.random.randn(BSZ, 1) + + ref = linear_model.Lasso(fit_intercept=False) + ref.fit(X, y) + ref_preds = ref.predict(X) + + model = ml.linear_model.Lasso() + model.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) + model_preds = model.predict(torch.from_numpy(X).to(device)) + model_forward = model(torch.from_numpy(X).to(device)) + + self.assertTrue( + np.allclose(ref_preds, model_preds[0].detach().cpu().numpy().flatten(), atol=1e-3) + ) + self.assertTrue( + np.allclose( + ref_preds, model_forward[0].detach().cpu().numpy().flatten(), atol=1e-3 + ) ) - ) - inputX = torch.from_numpy(X) - inputX.requires_grad = True - self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) - self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + inputX = torch.from_numpy(X).to(device) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) def test_fit_intercept(self): - X = np.random.randn(BSZ, DIM) - y = np.random.randn(BSZ, 1) - - ref = linear_model.Lasso(fit_intercept=True) - ref.fit(X, y) - ref_preds = ref.predict(X) - - model = ml.linear_model.Lasso(fit_intercept=True) - model.fit(torch.from_numpy(X), torch.from_numpy(y)) - model_preds = model.predict(torch.from_numpy(X)) - model_forward = model(torch.from_numpy(X)) - - self.assertTrue( - np.allclose(ref_preds, model_preds[0].detach().numpy().flatten(), atol=1e-3) - ) - self.assertTrue( - np.allclose( - ref_preds, model_forward[0].detach().numpy().flatten(), atol=1e-3 + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + X = np.random.randn(BSZ, DIM) + y = np.random.randn(BSZ, 1) + + ref = linear_model.Lasso(fit_intercept=True) + ref.fit(X, y) + ref_preds = ref.predict(X) + + model = ml.linear_model.Lasso(fit_intercept=True) + model.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) + model_preds = model.predict(torch.from_numpy(X).to(device)) + model_forward = model(torch.from_numpy(X).to(device)) + + self.assertTrue( + np.allclose(ref_preds, model_preds[0].detach().cpu().numpy().flatten(), atol=1e-3) + ) + self.assertTrue( + np.allclose( + ref_preds, model_forward[0].detach().cpu().numpy().flatten(), atol=1e-3 + ) ) - ) - inputX = torch.from_numpy(X) - inputX.requires_grad = True - self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) - self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + inputX = torch.from_numpy(X).to(device) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) def test_fit_positive(self): - X = np.random.randn(BSZ, DIM) - y = np.random.randn(BSZ, 1) - - ref = linear_model.Lasso(fit_intercept=False, positive=True) - ref.fit(X, y) - ref_preds = ref.predict(X) - - model = ml.linear_model.Lasso(fit_intercept=False, positive=True) - model.fit(torch.from_numpy(X), torch.from_numpy(y)) - model_preds = model.predict(torch.from_numpy(X)) - model_forward = model(torch.from_numpy(X)) - - self.assertTrue( - np.allclose(ref_preds, model_preds[0].detach().numpy().flatten(), atol=1e-3) - ) - self.assertTrue( - np.allclose( - ref_preds, model_forward[0].detach().numpy().flatten(), atol=1e-3 + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + X = np.random.randn(BSZ, DIM) + y = np.random.randn(BSZ, 1) + + ref = linear_model.Lasso(fit_intercept=False, positive=True) + ref.fit(X, y) + ref_preds = ref.predict(X) + + model = ml.linear_model.Lasso(fit_intercept=False, positive=True) + model.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) + model_preds = model.predict(torch.from_numpy(X).to(device)) + model_forward = model(torch.from_numpy(X).to(device)) + + self.assertTrue( + np.allclose(ref_preds, model_preds[0].detach().cpu().numpy().flatten(), atol=1e-3) + ) + self.assertTrue( + np.allclose( + ref_preds, model_forward[0].detach().cpu().numpy().flatten(), atol=1e-3 + ) ) - ) - inputX = torch.from_numpy(X) - inputX.requires_grad = True - self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) - self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + inputX = torch.from_numpy(X).to(device) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/torchml/linear_model/lasso.py b/torchml/linear_model/lasso.py index a6ab155..557642b 100644 --- a/torchml/linear_model/lasso.py +++ b/torchml/linear_model/lasso.py @@ -81,6 +81,8 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" + device = X.device + m, n = X.shape w = cp.Variable((n, 1)) @@ -120,10 +122,10 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): # this object is now callable with pytorch tensors if self.fit_intercept: self.weight, self.intercept = fit_lr( - X, y, torch.tensor(self.alpha, dtype=torch.float64) + X, y, torch.tensor(self.alpha, dtype=torch.float64, device=device) ) else: - self.weight = fit_lr(X, y, torch.tensor(self.alpha, dtype=torch.float64)) + self.weight = fit_lr(X, y, torch.tensor(self.alpha, dtype=torch.float64, device=device)) self.weight = torch.stack(list(self.weight), dim=0) def predict(self, X: torch.Tensor): From a5d433623f0205bdf384130754ade025e0ea413b Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 13 Nov 2022 20:43:07 -0800 Subject: [PATCH 28/31] add gpu support for ridge and linear regression --- .../linear_model/linear_regression_test.py | 40 ++++++----- tests/unit/linear_model/ridge_test.py | 72 ++++++++++--------- torchml/linear_model/ridge.py | 8 ++- 3 files changed, 64 insertions(+), 56 deletions(-) diff --git a/tests/unit/linear_model/linear_regression_test.py b/tests/unit/linear_model/linear_regression_test.py index 7eab44e..c056daa 100644 --- a/tests/unit/linear_model/linear_regression_test.py +++ b/tests/unit/linear_model/linear_regression_test.py @@ -12,25 +12,27 @@ class TestLinearRegression(unittest.TestCase): def test_fit(self): - X = np.random.randn(BSZ, DIM) - y = np.random.randn(BSZ, 1) - - ref = linear_model.LinearRegression(fit_intercept=False) - ref.fit(X, y) - ref_preds = ref.predict(X) - - model = ml.linear_model.LinearRegression(fit_intercept=False) - model.fit(torch.from_numpy(X), torch.from_numpy(y)) - model_preds = model.predict(torch.from_numpy(X)) - model_forward = model(torch.from_numpy(X)) - - self.assertTrue(np.allclose(ref_preds, model_preds.numpy())) - self.assertTrue(np.allclose(ref_preds, model_forward.numpy())) - - inputX = torch.from_numpy(X) - inputX.requires_grad = True - self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) - self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + X = np.random.randn(BSZ, DIM) + y = np.random.randn(BSZ, 1) + + ref = linear_model.LinearRegression(fit_intercept=False) + ref.fit(X, y) + ref_preds = ref.predict(X) + + model = ml.linear_model.LinearRegression(fit_intercept=False) + model.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) + model_preds = model.predict(torch.from_numpy(X).to(device)) + model_forward = model(torch.from_numpy(X).to(device)) + + self.assertTrue(np.allclose(ref_preds, model_preds.cpu().numpy())) + self.assertTrue(np.allclose(ref_preds, model_forward.cpu().numpy())) + + inputX = torch.from_numpy(X).to(device) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/tests/unit/linear_model/ridge_test.py b/tests/unit/linear_model/ridge_test.py index 30c257e..41f2c08 100644 --- a/tests/unit/linear_model/ridge_test.py +++ b/tests/unit/linear_model/ridge_test.py @@ -12,46 +12,50 @@ class TestRidge(unittest.TestCase): def test_fit(self): - X = np.random.randn(BSZ, DIM) - y = np.random.randn(BSZ, 1) + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + X = np.random.randn(BSZ, DIM) + y = np.random.randn(BSZ, 1) - ref = linear_model.Ridge(fit_intercept=False) - ref.fit(X, y) - ref_preds = ref.predict(X) + ref = linear_model.Ridge(fit_intercept=False) + ref.fit(X, y) + ref_preds = ref.predict(X) - model = ml.linear_model.Ridge(fit_intercept=False) - model.fit(torch.from_numpy(X), torch.from_numpy(y)) - model_preds = model.predict(torch.from_numpy(X)) - model_forward = model(torch.from_numpy(X)) + model = ml.linear_model.Ridge(fit_intercept=False) + model.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) + model_preds = model.predict(torch.from_numpy(X).to(device)) + model_forward = model(torch.from_numpy(X).to(device)) - self.assertTrue(np.allclose(ref_preds, model_preds.numpy())) - self.assertTrue(np.allclose(ref_preds, model_forward.numpy())) + self.assertTrue(np.allclose(ref_preds, model_preds.cpu().numpy())) + self.assertTrue(np.allclose(ref_preds, model_forward.cpu().numpy())) - inputX = torch.from_numpy(X) - inputX.requires_grad = True - self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) - self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + inputX = torch.from_numpy(X).to(device) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) def test_fit_intercept(self): - X = np.random.randn(BSZ, DIM) - y = np.random.randn(BSZ, 1) - - ref = linear_model.Ridge(fit_intercept=True) - ref.fit(X, y) - ref_preds = ref.predict(X) - - model = ml.linear_model.Ridge(fit_intercept=True) - model.fit(torch.from_numpy(X), torch.from_numpy(y)) - model_preds = model.predict(torch.from_numpy(X)) - model_forward = model(torch.from_numpy(X)) - - self.assertTrue(np.allclose(ref_preds, model_preds.numpy())) - self.assertTrue(np.allclose(ref_preds, model_forward.numpy())) - - inputX = torch.from_numpy(X) - inputX.requires_grad = True - self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) - self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + X = np.random.randn(BSZ, DIM) + y = np.random.randn(BSZ, 1) + + ref = linear_model.Ridge(fit_intercept=True) + ref.fit(X, y) + ref_preds = ref.predict(X) + + model = ml.linear_model.Ridge(fit_intercept=True) + model.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) + model_preds = model.predict(torch.from_numpy(X).to(device)) + model_forward = model(torch.from_numpy(X).to(device)) + + self.assertTrue(np.allclose(ref_preds, model_preds.cpu().numpy())) + self.assertTrue(np.allclose(ref_preds, model_forward.cpu().numpy())) + + inputX = torch.from_numpy(X).to(device) + inputX.requires_grad = True + self.assertTrue(gradcheck(model.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(model, inputX, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/torchml/linear_model/ridge.py b/torchml/linear_model/ridge.py index 1c43555..cace70b 100644 --- a/torchml/linear_model/ridge.py +++ b/torchml/linear_model/ridge.py @@ -80,14 +80,16 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): """ assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" + device = X.device + if self.fit_intercept: - X = torch.cat([torch.ones(X.shape[0], 1), X], dim=1) + X = torch.cat([torch.ones(X.shape[0], 1, device=device), X], dim=1) # L2 penalty term will not apply when alpha is 0 if self.alpha == 0: self.weight = torch.pinverse(X.T @ X) @ X.T @ y else: - ridge = self.alpha * torch.eye(X.shape[1]) + ridge = self.alpha * torch.eye(X.shape[1], device=device) # intercept term is not penalized when fit_intercept is true if self.fit_intercept: ridge[0][0] = 0 @@ -112,5 +114,5 @@ def predict(self, X: torch.Tensor): ~~~ """ if self.fit_intercept: - X = torch.cat([torch.ones(X.shape[0], 1), X], dim=1) + X = torch.cat([torch.ones(X.shape[0], 1, device=X.device), X], dim=1) return X @ self.weight From 38b44000022f26028e84f6f2273605262d8a6234 Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 13 Nov 2022 20:58:54 -0800 Subject: [PATCH 29/31] add gpu for neighbors --- .../neighbors/k_neighbors_classifier_test.py | 64 ++++++++++--------- .../unit/neighbors/nearest_centroids_test.py | 35 +++++----- .../unit/neighbors/nearest_neighbors_test.py | 62 +++++++++--------- torchml/neighbors/k_neighbors_classifier.py | 32 ++++++---- torchml/neighbors/nearest_centroid.py | 7 +- 5 files changed, 108 insertions(+), 92 deletions(-) diff --git a/tests/unit/neighbors/k_neighbors_classifier_test.py b/tests/unit/neighbors/k_neighbors_classifier_test.py index 7333fbc..1ea9782 100644 --- a/tests/unit/neighbors/k_neighbors_classifier_test.py +++ b/tests/unit/neighbors/k_neighbors_classifier_test.py @@ -12,37 +12,39 @@ class TestkneighborsClassifier(unittest.TestCase): def test_knn_classifier(self): - for i in range(1, 5, 1): - X = np.random.randn(BSZ, DIM) - y = np.random.randint(low=-100, high=100, size=BSZ) - p = np.random.randn(5, DIM) - - ref = neighbors.KNeighborsClassifier( - weights="distance" if i % 2 else "uniform", p=i - ) - ref.fit(X, y) - refr = ref.predict(p) - refp = ref.predict_proba(p) - - test = ml.neighbors.KNeighborsClassifier( - weights="distance" if i % 2 else "uniform", p=i - ) - test.fit(torch.from_numpy(X), torch.from_numpy(y)) - inputP = torch.from_numpy(p) - inputP.requires_grad = True - - testr = test.predict(torch.from_numpy(p)) - testp = test.predict_proba(torch.from_numpy(p)) - self.assertTrue(gradcheck(test.predict, inputP, eps=1e-6, atol=1e-3)) - # self.assertTrue(gradcheck(test.predict_proba, inputP, eps=1e-20, atol=1e-3)) - self.assertTrue(np.allclose(refr, testr.numpy())) - self.assertTrue(np.allclose(refp, testp.numpy())) - - refr2 = ref.kneighbors(p) - testr2 = test.kneighbors(torch.from_numpy(p)) - self.assertTrue(gradcheck(test.kneighbors, inputP, eps=1e-6, atol=1e-3)) - self.assertTrue(np.allclose(refr2[0], testr2[0].numpy())) - self.assertTrue(np.allclose(refr2[1], testr2[1].numpy())) + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + for i in range(1, 5, 1): + X = np.random.randn(BSZ, DIM) + y = np.random.randint(low=-100, high=100, size=BSZ) + p = np.random.randn(5, DIM) + + ref = neighbors.KNeighborsClassifier( + weights="distance" if i % 2 else "uniform", p=i + ) + ref.fit(X, y) + refr = ref.predict(p) + refp = ref.predict_proba(p) + + test = ml.neighbors.KNeighborsClassifier( + weights="distance" if i % 2 else "uniform", p=i + ) + test.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) + inputP = torch.from_numpy(p).to(device) + inputP.requires_grad = True + + testr = test.predict(torch.from_numpy(p).to(device)) + testp = test.predict_proba(torch.from_numpy(p).to(device)) + self.assertTrue(gradcheck(test.predict, inputP, eps=1e-6, atol=1e-3)) + # self.assertTrue(gradcheck(test.predict_proba, inputP, eps=1e-20, atol=1e-3)) + self.assertTrue(np.allclose(refr, testr.cpu().numpy())) + self.assertTrue(np.allclose(refp, testp.cpu().numpy())) + + refr2 = ref.kneighbors(p) + testr2 = test.kneighbors(torch.from_numpy(p).to(device)) + self.assertTrue(gradcheck(test.kneighbors, inputP, eps=1e-6, atol=1e-3)) + self.assertTrue(np.allclose(refr2[0], testr2[0].cpu().numpy())) + self.assertTrue(np.allclose(refr2[1], testr2[1].cpu().numpy())) if __name__ == "__main__": diff --git a/tests/unit/neighbors/nearest_centroids_test.py b/tests/unit/neighbors/nearest_centroids_test.py index 5f2b35b..6db9653 100644 --- a/tests/unit/neighbors/nearest_centroids_test.py +++ b/tests/unit/neighbors/nearest_centroids_test.py @@ -13,23 +13,24 @@ class Testcentroids(unittest.TestCase): def test_kneighbors(self): - - for i in range(100): - X = np.random.randn(SAMPLES, FEA) - y = np.random.randint(1, CLS, size=SAMPLES) - torchX = torch.from_numpy(X) - torchy = torch.from_numpy(y) - ref = neighbors.NearestCentroid() - cent = ml.neighbors.NearestCentroid() - ref.fit(X, y) - cent.fit(torchX, torchy) - samp = np.random.randn(SAMPLES, FEA) - refres = ref.predict(samp) - centres = cent.predict(torch.from_numpy(samp)).numpy() - self.assertTrue(np.array_equal(refres, centres)) - inputSamp = torch.from_numpy(samp) - inputSamp.requires_grad = True - self.assertTrue(gradcheck(cent.predict, inputSamp, eps=1e-6, atol=1e-3)) + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + for i in range(100): + X = np.random.randn(SAMPLES, FEA) + y = np.random.randint(1, CLS, size=SAMPLES) + torchX = torch.from_numpy(X).to(device) + torchy = torch.from_numpy(y).to(device) + ref = neighbors.NearestCentroid() + cent = ml.neighbors.NearestCentroid() + ref.fit(X, y) + cent.fit(torchX, torchy) + samp = np.random.randn(SAMPLES, FEA) + refres = ref.predict(samp) + centres = cent.predict(torch.from_numpy(samp).to(device)).cpu().numpy() + self.assertTrue(np.array_equal(refres, centres)) + inputSamp = torch.from_numpy(samp).to(device) + inputSamp.requires_grad = True + self.assertTrue(gradcheck(cent.predict, inputSamp, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/tests/unit/neighbors/nearest_neighbors_test.py b/tests/unit/neighbors/nearest_neighbors_test.py index ea274fe..13317d5 100644 --- a/tests/unit/neighbors/nearest_neighbors_test.py +++ b/tests/unit/neighbors/nearest_neighbors_test.py @@ -10,36 +10,38 @@ class Testkneighbors(unittest.TestCase): - def test_kneighbors(self): - for i in range(1, 5, 1): - X = np.random.randn(BSZ, DIM) - y = np.random.randn(5, DIM) - ref = neighbors.NearestNeighbors(p=i) - ref.fit(X) - test = ref.kneighbors(y) - - model = ml.neighbors.NearestNeighbors(p=i) - model.fit(torch.from_numpy(X)) - res = model.kneighbors(torch.from_numpy(y)) - - # return distance is true - self.assertTrue(np.allclose(test[0], res[0].numpy())) - self.assertTrue(np.allclose(test[1], res[1].numpy())) - inputY = torch.from_numpy(y) - inputY.requires_grad = True - self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) - - ref = neighbors.NearestNeighbors(p=i) - ref.fit(X) - test = ref.kneighbors(y, return_distance=False) - - model = ml.neighbors.NearestNeighbors(p=i) - model.fit(torch.from_numpy(X)) - res = model.kneighbors(torch.from_numpy(y), return_distance=False) - - # return distance is false - self.assertTrue(np.allclose(test, res.numpy())) - self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) + def test_kneighbors_classifier(self): + for i in range(2): + device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + for i in range(1, 5, 1): + X = np.random.randn(BSZ, DIM) + y = np.random.randn(5, DIM) + ref = neighbors.NearestNeighbors(p=i) + ref.fit(X) + test = ref.kneighbors(y) + + model = ml.neighbors.NearestNeighbors(p=i) + model.fit(torch.from_numpy(X).to(device)) + res = model.kneighbors(torch.from_numpy(y).to(device)) + + # return distance is true + self.assertTrue(np.allclose(test[0], res[0].cpu().numpy())) + self.assertTrue(np.allclose(test[1], res[1].cpu().numpy())) + inputY = torch.from_numpy(y).to(device) + inputY.requires_grad = True + self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) + + ref = neighbors.NearestNeighbors(p=i) + ref.fit(X) + test = ref.kneighbors(y, return_distance=False) + + model = ml.neighbors.NearestNeighbors(p=i) + model.fit(torch.from_numpy(X).to(device)) + res = model.kneighbors(torch.from_numpy(y).to(device), return_distance=False) + + # return distance is false + self.assertTrue(np.allclose(test, res.cpu().numpy())) + self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/torchml/neighbors/k_neighbors_classifier.py b/torchml/neighbors/k_neighbors_classifier.py index ed56256..62d94a9 100644 --- a/torchml/neighbors/k_neighbors_classifier.py +++ b/torchml/neighbors/k_neighbors_classifier.py @@ -1,7 +1,10 @@ import numbers import warnings +from typing import Tuple, Any import torch +from torch import Tensor + import torchml as ml @@ -107,6 +110,7 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): """ self.KNN.fit(X) self.weights = self._check_weights(weights=self.weights) + device = X.device if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: if y.ndim != 1: warnings.warn( @@ -122,7 +126,7 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): self.outputs_2d_ = True self.classes_ = [] - self._y = torch.empty(size=y.shape, dtype=torch.long) + self._y = torch.empty(size=y.shape, dtype=torch.long, device=device) for k in range(self._y.shape[1]): classes, self._y[:, k] = torch.unique(y[:, k], return_inverse=True) self.classes_.append(classes) @@ -141,6 +145,7 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: * `X` (torch.Tensor): the target point """ + device = X.device if self.weights == "uniform": neigh_ind = self.KNN.kneighbors(X, return_distance=False) neigh_dist = None @@ -157,7 +162,7 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: n_queries = len(X) weights = self._get_weights(neigh_dist, self.weights) - y_pred = torch.empty((n_queries, n_outputs), dtype=classes_[0].dtype) + y_pred = torch.empty((n_queries, n_outputs), dtype=classes_[0].dtype, device=device) for k, classes_k in enumerate(classes_): if weights is None: @@ -182,6 +187,7 @@ def predict_proba(self, X: torch.Tensor) -> torch.Tensor: * `X` (torch.Tensor): the target point """ + device = X.device if self.weights == "uniform": neigh_ind = self.KNN.kneighbors(X, return_distance=False) neigh_dist = None @@ -198,13 +204,13 @@ def predict_proba(self, X: torch.Tensor) -> torch.Tensor: weights = self._get_weights(neigh_dist, self.weights) if weights is None: - weights = torch.ones_like(neigh_ind) + weights = torch.ones_like(neigh_ind, device=device) all_rows = torch.arange(n_queries) probabilities = [] for k, classes_k in enumerate(classes_): pred_labels = _y[:, k][neigh_ind] - proba_k = torch.zeros((n_queries, len(classes_k))) + proba_k = torch.zeros((n_queries, len(classes_k)), device=device) for i, idx in enumerate(pred_labels.T): proba_k[all_rows, idx] += weights[:, i] @@ -264,21 +270,23 @@ def _get_weights(self, dist: torch.Tensor, weights: str) -> torch.Tensor: "'distance', or a callable function" ) - def _weighted_mode(self, a: torch.Tensor, w: torch.Tensor) -> torch.Tensor: - res = torch.empty(0) - resi = torch.empty(0) + def _weighted_mode(self, a: torch.Tensor, w: torch.Tensor) -> tuple[Tensor | Any, Tensor | Any]: + device = a.device + res = torch.empty(0, device=device) + resi = torch.empty(0, device=device) for i, x in enumerate(a): res1 = self._weighted_mode_util(x, w) - res = torch.cat((res, torch.tensor([res1[0]]))) - resi = torch.cat((resi, torch.tensor([res1[1]]))) + res = torch.cat((res, torch.tensor([res1[0]], device=device))) + resi = torch.cat((resi, torch.tensor([res1[1]], device=device))) return res, resi - def _weighted_mode_util(self, a: torch.Tensor, w: torch.Tensor) -> torch.Tensor: + def _weighted_mode_util(self, a: torch.Tensor, w: torch.Tensor) -> tuple[Any, Tensor]: + device = a.device unique_a = torch.unique(a) - res = torch.empty(0) + res = torch.empty(0, device=device) for i, x in enumerate(unique_a): cleared = (a == x).float() cleared_weights = cleared * w sum = torch.sum(cleared_weights) - res = torch.cat((res, torch.tensor([sum]))) + res = torch.cat((res, torch.tensor([sum], device=device))) return unique_a[torch.argmax(res)], torch.max(res) diff --git a/torchml/neighbors/nearest_centroid.py b/torchml/neighbors/nearest_centroid.py index d6d7ae3..7d7ee23 100644 --- a/torchml/neighbors/nearest_centroid.py +++ b/torchml/neighbors/nearest_centroid.py @@ -64,6 +64,8 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): * `y` (torch.Tensor): array-like of shape (n_samples,) Target values """ + device = X.device + n_samples, n_features = X.shape # y_ind: idx, y_classes: unique tensor @@ -79,7 +81,7 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): # Mask mapping each class to its members. self.centroids_ = torch.empty( - (n_classes, n_features), dtype=X.dtype, device=torch.device("cpu") + (n_classes, n_features), dtype=X.dtype, device=device ) # Number of clusters in each class. @@ -109,10 +111,11 @@ def predict(self, X: torch.tensor) -> torch.tensor: * (torch.Tensor): the predicted classes """ + device = X.device if X is None or X.size(dim=0) < 1: print("Warning: check input size") - ret = torch.empty(X.size(dim=0)) + ret = torch.empty(X.size(dim=0), device=device) for i in range(X.size(dim=0)): ret[i] = self.classes_[ From f02797217a343924a9472938ad05b12ce38868cb Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 13 Nov 2022 22:46:27 -0800 Subject: [PATCH 30/31] add gpu support --- tests/unit/linear_model/lasso_test.py | 36 ++++++-- .../linear_model/linear_regression_test.py | 2 +- tests/unit/linear_model/ridge_test.py | 4 +- .../neighbors/k_neighbors_classifier_test.py | 2 +- .../unit/neighbors/nearest_centroids_test.py | 2 +- .../unit/neighbors/nearest_neighbors_test.py | 14 ++- tests/unit/svm/linear_svc_test.py | 90 +++++++++++-------- tests/unit/svm/linear_svr_test.py | 66 ++++++++------ torchml/linear_model/lasso.py | 4 +- torchml/neighbors/k_neighbors_classifier.py | 12 ++- torchml/svm/linear_svc.py | 10 ++- torchml/svm/linear_svr.py | 4 +- 12 files changed, 150 insertions(+), 96 deletions(-) diff --git a/tests/unit/linear_model/lasso_test.py b/tests/unit/linear_model/lasso_test.py index f88ed25..4ed6094 100644 --- a/tests/unit/linear_model/lasso_test.py +++ b/tests/unit/linear_model/lasso_test.py @@ -13,7 +13,7 @@ class TestLasso(unittest.TestCase): def test_fit(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) @@ -28,11 +28,17 @@ def test_fit(self): model_forward = model(torch.from_numpy(X).to(device)) self.assertTrue( - np.allclose(ref_preds, model_preds[0].detach().cpu().numpy().flatten(), atol=1e-3) + np.allclose( + ref_preds, + model_preds[0].detach().cpu().numpy().flatten(), + atol=1e-3, + ) ) self.assertTrue( np.allclose( - ref_preds, model_forward[0].detach().cpu().numpy().flatten(), atol=1e-3 + ref_preds, + model_forward[0].detach().cpu().numpy().flatten(), + atol=1e-3, ) ) @@ -43,7 +49,7 @@ def test_fit(self): def test_fit_intercept(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) @@ -57,11 +63,17 @@ def test_fit_intercept(self): model_forward = model(torch.from_numpy(X).to(device)) self.assertTrue( - np.allclose(ref_preds, model_preds[0].detach().cpu().numpy().flatten(), atol=1e-3) + np.allclose( + ref_preds, + model_preds[0].detach().cpu().numpy().flatten(), + atol=1e-3, + ) ) self.assertTrue( np.allclose( - ref_preds, model_forward[0].detach().cpu().numpy().flatten(), atol=1e-3 + ref_preds, + model_forward[0].detach().cpu().numpy().flatten(), + atol=1e-3, ) ) @@ -72,7 +84,7 @@ def test_fit_intercept(self): def test_fit_positive(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) @@ -86,11 +98,17 @@ def test_fit_positive(self): model_forward = model(torch.from_numpy(X).to(device)) self.assertTrue( - np.allclose(ref_preds, model_preds[0].detach().cpu().numpy().flatten(), atol=1e-3) + np.allclose( + ref_preds, + model_preds[0].detach().cpu().numpy().flatten(), + atol=1e-3, + ) ) self.assertTrue( np.allclose( - ref_preds, model_forward[0].detach().cpu().numpy().flatten(), atol=1e-3 + ref_preds, + model_forward[0].detach().cpu().numpy().flatten(), + atol=1e-3, ) ) diff --git a/tests/unit/linear_model/linear_regression_test.py b/tests/unit/linear_model/linear_regression_test.py index c056daa..ed4c201 100644 --- a/tests/unit/linear_model/linear_regression_test.py +++ b/tests/unit/linear_model/linear_regression_test.py @@ -13,7 +13,7 @@ class TestLinearRegression(unittest.TestCase): def test_fit(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) diff --git a/tests/unit/linear_model/ridge_test.py b/tests/unit/linear_model/ridge_test.py index 41f2c08..cd78111 100644 --- a/tests/unit/linear_model/ridge_test.py +++ b/tests/unit/linear_model/ridge_test.py @@ -13,7 +13,7 @@ class TestRidge(unittest.TestCase): def test_fit(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) @@ -36,7 +36,7 @@ def test_fit(self): def test_fit_intercept(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") X = np.random.randn(BSZ, DIM) y = np.random.randn(BSZ, 1) diff --git a/tests/unit/neighbors/k_neighbors_classifier_test.py b/tests/unit/neighbors/k_neighbors_classifier_test.py index 1ea9782..719cae3 100644 --- a/tests/unit/neighbors/k_neighbors_classifier_test.py +++ b/tests/unit/neighbors/k_neighbors_classifier_test.py @@ -13,7 +13,7 @@ class TestkneighborsClassifier(unittest.TestCase): def test_knn_classifier(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") for i in range(1, 5, 1): X = np.random.randn(BSZ, DIM) y = np.random.randint(low=-100, high=100, size=BSZ) diff --git a/tests/unit/neighbors/nearest_centroids_test.py b/tests/unit/neighbors/nearest_centroids_test.py index 6db9653..bcbe872 100644 --- a/tests/unit/neighbors/nearest_centroids_test.py +++ b/tests/unit/neighbors/nearest_centroids_test.py @@ -14,7 +14,7 @@ class Testcentroids(unittest.TestCase): def test_kneighbors(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") for i in range(100): X = np.random.randn(SAMPLES, FEA) y = np.random.randint(1, CLS, size=SAMPLES) diff --git a/tests/unit/neighbors/nearest_neighbors_test.py b/tests/unit/neighbors/nearest_neighbors_test.py index 13317d5..a0cec86 100644 --- a/tests/unit/neighbors/nearest_neighbors_test.py +++ b/tests/unit/neighbors/nearest_neighbors_test.py @@ -12,7 +12,7 @@ class Testkneighbors(unittest.TestCase): def test_kneighbors_classifier(self): for i in range(2): - device = torch.device('cuda' if torch.cuda.is_available() and i else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() and i else "cpu") for i in range(1, 5, 1): X = np.random.randn(BSZ, DIM) y = np.random.randn(5, DIM) @@ -29,7 +29,9 @@ def test_kneighbors_classifier(self): self.assertTrue(np.allclose(test[1], res[1].cpu().numpy())) inputY = torch.from_numpy(y).to(device) inputY.requires_grad = True - self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) + self.assertTrue( + gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3) + ) ref = neighbors.NearestNeighbors(p=i) ref.fit(X) @@ -37,11 +39,15 @@ def test_kneighbors_classifier(self): model = ml.neighbors.NearestNeighbors(p=i) model.fit(torch.from_numpy(X).to(device)) - res = model.kneighbors(torch.from_numpy(y).to(device), return_distance=False) + res = model.kneighbors( + torch.from_numpy(y).to(device), return_distance=False + ) # return distance is false self.assertTrue(np.allclose(test, res.cpu().numpy())) - self.assertTrue(gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3)) + self.assertTrue( + gradcheck(model.kneighbors, inputY, eps=1e-6, atol=1e-3) + ) if __name__ == "__main__": diff --git a/tests/unit/svm/linear_svc_test.py b/tests/unit/svm/linear_svc_test.py index 3561794..93d70a6 100644 --- a/tests/unit/svm/linear_svc_test.py +++ b/tests/unit/svm/linear_svc_test.py @@ -9,53 +9,65 @@ from torchml.svm import LinearSVC n_samples = 5000 -n_features = 5 -n_classes = 5 -n_informative = 5 +n_features = 4 +n_classes = 2 +n_informative = 4 class TestLinearSVC(unittest.TestCase): def test_LinearSVC(self): - x, y = make_classification( - n_samples=n_samples, - n_features=n_features, - n_classes=n_classes, - n_informative=n_informative, - n_redundant=n_features - n_informative, - ) - lsvc = LinearSVC(max_iter=1000) - start = time.time() - lsvc.fit(torch.from_numpy(x), torch.from_numpy(y)) - end = time.time() - # print(end - start) - start = time.time() - reflsvc = svm.LinearSVC(max_iter=100000) - reflsvc.fit(x, y) - - end = time.time() - # print(end - start) - self.assertTrue(np.allclose(lsvc.coef_.numpy(), reflsvc.coef_, atol=1e-2)) - self.assertTrue( - np.allclose(lsvc.intercept_.numpy(), reflsvc.intercept_, atol=1e-2) - ) - self.assertTrue( - np.allclose( - lsvc.decision_function(torch.from_numpy(x)), - reflsvc.decision_function(x), - atol=1e-2, + for i in range(2): + device = torch.device("cuda" if torch.cuda.is_available and i else "cpu") + x, y = make_classification( + n_samples=n_samples, + n_features=n_features, + n_classes=n_classes, + n_informative=n_informative, + n_redundant=n_features - n_informative, ) - ) + lsvc = LinearSVC(max_iter=1000) + start = time.time() + lsvc.fit(torch.from_numpy(x).to(device), torch.from_numpy(y).to(device)) + end = time.time() + # print(end - start) + start = time.time() + reflsvc = svm.LinearSVC(max_iter=100000) + reflsvc.fit(x, y) - inputX = torch.from_numpy(x) - inputX.requires_grad = True - self.assertTrue(gradcheck(lsvc.decision_function, inputX, eps=1e-6, atol=1e-3)) + end = time.time() + # print(end - start) + self.assertTrue( + np.allclose(lsvc.coef_.cpu().numpy(), reflsvc.coef_, atol=1e-2) + ) + self.assertTrue( + np.allclose( + lsvc.intercept_.cpu().numpy(), reflsvc.intercept_, atol=1e-2 + ) + ) + self.assertTrue( + np.allclose( + lsvc.decision_function(torch.from_numpy(x).to(device)) + .cpu() + .numpy(), + reflsvc.decision_function(x), + atol=1e-2, + ) + ) + + inputX = torch.from_numpy(x).to(device) + inputX.requires_grad = True + self.assertTrue( + gradcheck(lsvc.decision_function, inputX, eps=1e-6, atol=1e-3) + ) - self.assertTrue( - np.allclose( - lsvc.predict(torch.from_numpy(x)), reflsvc.predict(x), atol=1e-2 + self.assertTrue( + np.allclose( + lsvc.predict(torch.from_numpy(x).to(device)).cpu().numpy(), + reflsvc.predict(x), + atol=1e-2, + ) ) - ) - self.assertTrue(gradcheck(lsvc.predict, inputX, eps=1e-6, atol=1e-3)) + self.assertTrue(gradcheck(lsvc.predict, inputX, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/tests/unit/svm/linear_svr_test.py b/tests/unit/svm/linear_svr_test.py index e6b0857..c8a5e9a 100644 --- a/tests/unit/svm/linear_svr_test.py +++ b/tests/unit/svm/linear_svr_test.py @@ -9,41 +9,49 @@ from torchml.svm import LinearSVR n_samples = 5000 -n_features = 10 -n_informative = 7 +n_features = 4 +n_informative = 3 class TestLinearSVR(unittest.TestCase): def test_LinearSVR(self): - x, y = make_regression( - n_samples=n_samples, - n_features=n_features, - n_informative=n_informative, - ) - lsvr = LinearSVR(max_iter=1000) - start = time.time() - lsvr.fit(torch.from_numpy(x), torch.from_numpy(y)) - end = time.time() - # print(end - start) - start = time.time() - reflsvr = svm.LinearSVR(max_iter=100000) - reflsvr.fit(x, y) - - end = time.time() - # print(end - start) - self.assertTrue(np.allclose(lsvr.coef_.numpy(), reflsvr.coef_, atol=1e-2)) - self.assertTrue( - np.allclose(lsvr.intercept_.numpy(), reflsvr.intercept_, atol=1e-2) - ) - self.assertTrue( - np.allclose( - lsvr.predict(torch.from_numpy(x)), reflsvr.predict(x), atol=1e-2 + for i in range(2): + device = torch.device("cuda" if torch.cuda.is_available and i else "cpu") + x, y = make_regression( + n_samples=n_samples, + n_features=n_features, + n_informative=n_informative, + ) + lsvr = LinearSVR(max_iter=1000) + start = time.time() + lsvr.fit(torch.from_numpy(x).to(device), torch.from_numpy(y).to(device)) + end = time.time() + # print(end - start) + start = time.time() + reflsvr = svm.LinearSVR(max_iter=100000) + reflsvr.fit(x, y) + + end = time.time() + # print(end - start) + self.assertTrue( + np.allclose(lsvr.coef_.cpu().numpy(), reflsvr.coef_, atol=1e-2) + ) + self.assertTrue( + np.allclose( + lsvr.intercept_.cpu().numpy(), reflsvr.intercept_, atol=1e-2 + ) + ) + self.assertTrue( + np.allclose( + lsvr.predict(torch.from_numpy(x).to(device)).cpu().numpy(), + reflsvr.predict(x), + atol=1e-2, + ) ) - ) - inputX = torch.from_numpy(x) - inputX.requires_grad = True - self.assertTrue(gradcheck(lsvr.predict, inputX, eps=1e-6, atol=1e-3)) + inputX = torch.from_numpy(x).to(device) + inputX.requires_grad = True + self.assertTrue(gradcheck(lsvr.predict, inputX, eps=1e-6, atol=1e-3)) if __name__ == "__main__": diff --git a/torchml/linear_model/lasso.py b/torchml/linear_model/lasso.py index 557642b..5ebe4eb 100644 --- a/torchml/linear_model/lasso.py +++ b/torchml/linear_model/lasso.py @@ -125,7 +125,9 @@ def fit(self, X: torch.Tensor, y: torch.Tensor): X, y, torch.tensor(self.alpha, dtype=torch.float64, device=device) ) else: - self.weight = fit_lr(X, y, torch.tensor(self.alpha, dtype=torch.float64, device=device)) + self.weight = fit_lr( + X, y, torch.tensor(self.alpha, dtype=torch.float64, device=device) + ) self.weight = torch.stack(list(self.weight), dim=0) def predict(self, X: torch.Tensor): diff --git a/torchml/neighbors/k_neighbors_classifier.py b/torchml/neighbors/k_neighbors_classifier.py index 62d94a9..9a97053 100644 --- a/torchml/neighbors/k_neighbors_classifier.py +++ b/torchml/neighbors/k_neighbors_classifier.py @@ -162,7 +162,9 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: n_queries = len(X) weights = self._get_weights(neigh_dist, self.weights) - y_pred = torch.empty((n_queries, n_outputs), dtype=classes_[0].dtype, device=device) + y_pred = torch.empty( + (n_queries, n_outputs), dtype=classes_[0].dtype, device=device + ) for k, classes_k in enumerate(classes_): if weights is None: @@ -270,7 +272,9 @@ def _get_weights(self, dist: torch.Tensor, weights: str) -> torch.Tensor: "'distance', or a callable function" ) - def _weighted_mode(self, a: torch.Tensor, w: torch.Tensor) -> tuple[Tensor | Any, Tensor | Any]: + def _weighted_mode( + self, a: torch.Tensor, w: torch.Tensor + ) -> tuple[Tensor | Any, Tensor | Any]: device = a.device res = torch.empty(0, device=device) resi = torch.empty(0, device=device) @@ -280,7 +284,9 @@ def _weighted_mode(self, a: torch.Tensor, w: torch.Tensor) -> tuple[Tensor | Any resi = torch.cat((resi, torch.tensor([res1[1]], device=device))) return res, resi - def _weighted_mode_util(self, a: torch.Tensor, w: torch.Tensor) -> tuple[Any, Tensor]: + def _weighted_mode_util( + self, a: torch.Tensor, w: torch.Tensor + ) -> tuple[Any, Tensor]: device = a.device unique_a = torch.unique(a) res = torch.empty(0, device=device) diff --git a/torchml/svm/linear_svc.py b/torchml/svm/linear_svc.py index ff9614e..127f4f1 100644 --- a/torchml/svm/linear_svc.py +++ b/torchml/svm/linear_svc.py @@ -123,11 +123,12 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): """ if self.C < 0: raise ValueError("Penalty term must be positive; got (C=%r)" % self.C) + device = X.device self.classes_ = torch.unique(y) assert X.shape[0] == y.shape[0], "Number of X and y rows don't match" m, n = X.shape - self.coef_ = torch.empty((0, n)) - self.intercept_ = torch.empty((0)) + self.coef_ = torch.empty((0, n), device=device) + self.intercept_ = torch.empty((0), device=device) if self.classes_.shape[0] == 2: self._fit_with_one_class( X, y, self.classes_[1], sample_weight=sample_weight @@ -169,6 +170,7 @@ def predict(self, X: torch.Tensor) -> torch.Tensor: def _fit_with_one_class( self, X: torch.Tensor, y: torch.Tensor, fitting_class: any, sample_weight=None ): + device = X.device m, n = X.shape y = torch.unsqueeze(y, 1) @@ -186,9 +188,9 @@ def _fit_with_one_class( loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) if self.fit_intercept: - hinge = cp.pos(ones - cp.multiply(y, X_param @ w + b)) + hinge = cp.pos(ones - cp.multiply(y.cpu(), X_param @ w + b)) else: - hinge = cp.pos(ones - cp.multiply(y, X_param @ w)) + hinge = cp.pos(ones - cp.multiply(y.cpu(), X_param @ w)) if self.loss == "squared_hinge": loss += cp.multiply(self.C, cp.sum(cp.square(hinge))) diff --git a/torchml/svm/linear_svr.py b/torchml/svm/linear_svr.py index 913390e..0e2c67b 100644 --- a/torchml/svm/linear_svr.py +++ b/torchml/svm/linear_svr.py @@ -125,9 +125,9 @@ def fit(self, X: torch.Tensor, y: torch.Tensor, sample_weight=None): loss = cp.multiply((1 / 2.0), cp.norm(w, 2)) if self.fit_intercept: - hinge = cp.pos(cp.abs(y - (X_param @ w + b)) - self.epsilon) + hinge = cp.pos(cp.abs(y.cpu() - (X_param @ w + b)) - self.epsilon) else: - hinge = cp.pos(cp.abs(y - (X_param @ w + b)) - self.epsilon) + hinge = cp.pos(cp.abs(y.cpu() - (X_param @ w + b)) - self.epsilon) if self.loss == "epsilon_insensitive": loss += self.C * cp.sum(cp.square(hinge)) From c629c32a4e41c326af55fb2cfa0f295de5809c4c Mon Sep 17 00:00:00 2001 From: David Zhang <210057zzh@gmail.com> Date: Sun, 13 Nov 2022 22:53:07 -0800 Subject: [PATCH 31/31] add gpu support --- tests/unit/neighbors/k_neighbors_classifier_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/neighbors/k_neighbors_classifier_test.py b/tests/unit/neighbors/k_neighbors_classifier_test.py index 719cae3..84f5d13 100644 --- a/tests/unit/neighbors/k_neighbors_classifier_test.py +++ b/tests/unit/neighbors/k_neighbors_classifier_test.py @@ -30,13 +30,12 @@ def test_knn_classifier(self): weights="distance" if i % 2 else "uniform", p=i ) test.fit(torch.from_numpy(X).to(device), torch.from_numpy(y).to(device)) - inputP = torch.from_numpy(p).to(device) + inputP = torch.from_numpy(p).to(device).double() inputP.requires_grad = True testr = test.predict(torch.from_numpy(p).to(device)) testp = test.predict_proba(torch.from_numpy(p).to(device)) self.assertTrue(gradcheck(test.predict, inputP, eps=1e-6, atol=1e-3)) - # self.assertTrue(gradcheck(test.predict_proba, inputP, eps=1e-20, atol=1e-3)) self.assertTrue(np.allclose(refr, testr.cpu().numpy())) self.assertTrue(np.allclose(refp, testp.cpu().numpy()))