From 1064ea0e16ffea65bb4170f888a7bcfdc0c20413 Mon Sep 17 00:00:00 2001
From: Saravana Kumar <saravananbscitm@gmail.com>
Date: Thu, 15 Oct 2020 08:54:27 +0530
Subject: [PATCH 1/3] Add Binarizer

---
 .../Microsoft.Spark/ML/Feature/Binarizer.cs   | 89 +++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs b/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
new file mode 100644
index 000000000..d1c13ac5a
--- /dev/null
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
@@ -0,0 +1,89 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Interop.Ipc;
+using Microsoft.Spark.Sql;
+
+namespace Microsoft.Spark.ML.Feature
+{
+    /// <summary>
+    /// A <see cref="Binarizer"/>, Binarize a column of continuous features given a threshold.
+    /// </summary>
+    public class Binarizer : FeatureBase<Binarizer>, IJvmObjectReferenceProvider
+    {
+        private static readonly string s_binarizerClassName =
+            "org.apache.spark.ml.feature.Binarizer";
+
+        public Binarizer() : base(s_binarizerClassName)
+        {
+        }
+
+        public Binarizer(string uid) : base(s_binarizerClassName, uid)
+        {
+        }
+
+        internal Binarizer(JvmObjectReference jvmObject) : base(jvmObject)
+        {
+        }
+
+        JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
+        
+        /// <summary>
+        /// Gets the column that the <see cref="Binarizer"/> should read from
+        /// </summary>
+        /// <returns>string, input column</returns>
+        public string GetInputCol() => (string)(_jvmObject.Invoke("getInputCol"));
+
+        /// <summary>
+        /// Sets the column that the <see cref="Binarizer"/> should read from
+        /// </summary>
+        /// <param name="value">The name of the column to as the source</param>
+        /// <returns>New <see cref="Binarizer"/> object</returns>
+        public Binarizer SetInputCol(string value) => 
+            WrapAsBinarizer(_jvmObject.Invoke("setInputCol", value));
+
+        /// <summary>
+        /// The <see cref="Binarizer"/> will create a new column in the DataFrame, this is the
+        /// name of the new column.
+        /// </summary>
+        /// <returns>string, the output column</returns>
+        public string GetOutputCol() => (string)(_jvmObject.Invoke("getOutputCol"));
+
+        /// <summary>
+        /// The <see cref="Binarizer"/> will create a new column in the DataFrame, this is the
+        /// name of the new column.
+        /// </summary>
+        /// <param name="value">The name of the new column</param>
+        /// <returns>New <see cref="Binarizer"/> object</returns>
+        public Binarizer SetOutputCol(string value) => 
+            WrapAsBinarizer(_jvmObject.Invoke("setOutputCol", value));
+
+        /// <summary>
+        /// Executes the <see cref="Binarizer"/> and transforms the DataFrame to include the new
+        /// column
+        /// </summary>
+        /// <param name="source">The DataFrame to transform</param>
+        /// <returns>
+        /// New <see cref="DataFrame"/> object with the source <see cref="DataFrame"/> transformed
+        /// </returns>
+        public DataFrame Transform(DataFrame source) => 
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
+
+        /// <summary>
+        /// Loads the <see cref="Binarizer"/> that was previously saved using Save
+        /// </summary>
+        /// <param name="path">The path the previous <see cref="Binarizer"/> was saved to</param>
+        /// <returns>New <see cref="Binarizer"/> object, loaded from path</returns>
+        public static Binarizer Load(string path)
+        {
+            return WrapAsBinarizer(
+                SparkEnvironment.JvmBridge.CallStaticJavaMethod(
+                    s_binarizerClassName, "load", path));
+        }
+        
+        private static Binarizer WrapAsBinarizer(object obj) => 
+            new Binarizer((JvmObjectReference)obj);
+    }
+}

From 620ff95e6521e103cef0bd2e7a67c08d1a69a5de Mon Sep 17 00:00:00 2001
From: Saravana Kumar <saravananbscitm@gmail.com>
Date: Sat, 6 Feb 2021 21:52:36 +0530
Subject: [PATCH 2/3] Add test for Binarizer

---
 .../IpcTests/ML/Feature/BinarizerTests.cs     | 59 +++++++++++++++++++
 .../Microsoft.Spark/ML/Feature/Binarizer.cs   | 22 +++++++
 2 files changed, 81 insertions(+)
 create mode 100644 src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs
new file mode 100644
index 000000000..571824581
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs
@@ -0,0 +1,59 @@
+using System.Collections.Generic;
+using System.IO;
+using Microsoft.Spark.ML.Feature;
+using Microsoft.Spark.Sql;
+using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
+using Xunit;
+
+namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
+{
+    [Collection("Spark E2E Tests")]
+    public class BinarizerTests : FeatureBaseTests<Binarizer>
+    {
+        private readonly SparkSession _spark;
+
+        public BinarizerTests(SparkFixture fixture) : base(fixture)
+        {
+            _spark = fixture.Spark;
+        }
+
+        [Fact]
+        public void TestBinarizer()
+        {
+            DataFrame input = _spark.CreateDataFrame(
+                new List<GenericRow>
+                {
+                    new GenericRow(new object[] {0, 0.1}),
+                    new GenericRow(new object[] {1, 0.8}),
+                    new GenericRow(new object[] {2, 0.2})
+                },
+                new StructType(new List<StructField>
+                {
+                    new StructField("id", new IntegerType()), new StructField("feature", new DoubleType())
+                }));
+            string expectedUid = "theUid";
+            string outputCol = "binarized_feature";
+            Binarizer binarizer = new Binarizer(expectedUid)
+                .SetInputCol("feature")
+                .SetOutputCol(outputCol)
+                .SetThreshold(0.5);
+            DataFrame output = binarizer
+                .Transform(input);
+            StructType outputSchema = binarizer.TransformSchema(input.Schema());
+            
+            Assert.Contains(output.Schema().Fields, (f => f.Name == outputCol));
+            Assert.Contains(outputSchema.Fields, (f => f.Name == outputCol));
+            
+            using (var tempDirectory = new TemporaryDirectory())
+            {
+                string savePath = Path.Join(tempDirectory.Path, "Binarizer");
+                binarizer.Save(savePath);
+
+                Binarizer loadedBinarizer = Binarizer.Load(savePath);
+                Assert.Equal(loadedBinarizer.Uid(), binarizer.Uid());
+            }
+            Assert.Equal(expectedUid, binarizer.Uid());
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs b/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
index d1c13ac5a..69b17c7b5 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
@@ -5,6 +5,7 @@
 using Microsoft.Spark.Interop;
 using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Sql;
+using Microsoft.Spark.Sql.Types;
 
 namespace Microsoft.Spark.ML.Feature
 {
@@ -43,6 +44,14 @@ internal Binarizer(JvmObjectReference jvmObject) : base(jvmObject)
         /// <returns>New <see cref="Binarizer"/> object</returns>
         public Binarizer SetInputCol(string value) => 
             WrapAsBinarizer(_jvmObject.Invoke("setInputCol", value));
+        
+        /// <summary>
+        /// Param for threshold used to <see cref="Binarizer"/> continuous features.
+        /// </summary>
+        /// <param name="value">Threshold value</param>
+        /// <returns>New <see cref="Binarizer"/> object</returns>
+        public Binarizer SetThreshold(double value) => 
+            WrapAsBinarizer(_jvmObject.Invoke("setThreshold", value));
 
         /// <summary>
         /// The <see cref="Binarizer"/> will create a new column in the DataFrame, this is the
@@ -70,6 +79,19 @@ public Binarizer SetOutputCol(string value) =>
         /// </returns>
         public DataFrame Transform(DataFrame source) => 
             new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", source));
+        
+        /// <summary>
+        /// Executes the <see cref="Binarizer"/> and transforms the schema.
+        /// </summary>
+        /// <param name="value">The Schema to be transformed</param>
+        /// <returns>
+        /// New <see cref="StructType"/> object with the schema <see cref="StructType"/> transformed.
+        /// </returns>
+        public StructType TransformSchema(StructType value) =>
+            new StructType(
+                (JvmObjectReference)_jvmObject.Invoke(
+                    "transformSchema",
+                    DataType.FromJson(_jvmObject.Jvm, value.Json)));
 
         /// <summary>
         /// Loads the <see cref="Binarizer"/> that was previously saved using Save

From 8543177a87a0b83d1c4935c973badb909b2f0f62 Mon Sep 17 00:00:00 2001
From: Saravana Kumar <saravananbscitm@gmail.com>
Date: Sun, 7 Feb 2021 13:54:21 +0530
Subject: [PATCH 3/3] Add array of param option

---
 .../IpcTests/ML/Feature/BinarizerTests.cs     | 36 ++++++++++---
 .../Microsoft.Spark/ML/Feature/Binarizer.cs   | 50 +++++++++++++++++++
 2 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs
index 571824581..567674301 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BinarizerTests.cs
@@ -1,3 +1,4 @@
+using System;
 using System.Collections.Generic;
 using System.IO;
 using Microsoft.Spark.ML.Feature;
@@ -21,6 +22,7 @@ public BinarizerTests(SparkFixture fixture) : base(fixture)
         [Fact]
         public void TestBinarizer()
         {
+            string inputCol = "feature";
             DataFrame input = _spark.CreateDataFrame(
                 new List<GenericRow>
                 {
@@ -30,21 +32,24 @@ public void TestBinarizer()
                 },
                 new StructType(new List<StructField>
                 {
-                    new StructField("id", new IntegerType()), new StructField("feature", new DoubleType())
+                    new StructField("id", new IntegerType()), new StructField(inputCol, new DoubleType())
                 }));
             string expectedUid = "theUid";
             string outputCol = "binarized_feature";
+            double threshold = 0.5;
             Binarizer binarizer = new Binarizer(expectedUid)
-                .SetInputCol("feature")
+                .SetInputCol(inputCol)
                 .SetOutputCol(outputCol)
-                .SetThreshold(0.5);
-            DataFrame output = binarizer
-                .Transform(input);
+                .SetThreshold(threshold);
+            DataFrame output = binarizer.Transform(input);
             StructType outputSchema = binarizer.TransformSchema(input.Schema());
-            
+
             Assert.Contains(output.Schema().Fields, (f => f.Name == outputCol));
             Assert.Contains(outputSchema.Fields, (f => f.Name == outputCol));
-            
+            Assert.Equal(inputCol, binarizer.GetInputCol());
+            Assert.Equal(outputCol, binarizer.GetOutputCol());
+            Assert.Equal(threshold, binarizer.GetThreshold());
+
             using (var tempDirectory = new TemporaryDirectory())
             {
                 string savePath = Path.Join(tempDirectory.Path, "Binarizer");
@@ -53,7 +58,24 @@ public void TestBinarizer()
                 Binarizer loadedBinarizer = Binarizer.Load(savePath);
                 Assert.Equal(loadedBinarizer.Uid(), binarizer.Uid());
             }
+
             Assert.Equal(expectedUid, binarizer.Uid());
         }
+
+        [Fact]
+        public void TestBinarizerWithArrayParams()
+        {
+            string[] inputCol = new[] {"col1", "col2"};
+            string[] outputCol = new[] {"feature1", "feature2"};
+            double[] threshold = new[] {0.5, 0.8};
+            Binarizer binarizer = new Binarizer()
+                .SetInputCols(inputCol)
+                .SetOutputCols(outputCol)
+                .SetThresholds(threshold);
+
+            Assert.Equal(inputCol, binarizer.GetInputCols());
+            Assert.Equal(outputCol, binarizer.GetOutputCols());
+            Assert.Equal(threshold, binarizer.GetThresholds());
+        }
     }
 }
diff --git a/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs b/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
index 69b17c7b5..d5888752d 100644
--- a/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
+++ b/src/csharp/Microsoft.Spark/ML/Feature/Binarizer.cs
@@ -45,6 +45,20 @@ internal Binarizer(JvmObjectReference jvmObject) : base(jvmObject)
         public Binarizer SetInputCol(string value) => 
             WrapAsBinarizer(_jvmObject.Invoke("setInputCol", value));
         
+        /// <summary>
+        /// Gets the columns that the <see cref="Binarizer"/> should read from
+        /// </summary>
+        /// <returns>array of strings, input column</returns>
+        public string[] GetInputCols() => (string[])(_jvmObject.Invoke("getInputCols"));
+
+        /// <summary>
+        /// Sets the columns that the <see cref="Binarizer"/> should read from
+        /// </summary>
+        /// <param name="value">The name of the columns to as the source</param>
+        /// <returns>New <see cref="Binarizer"/> object</returns>
+        public Binarizer SetInputCols(string[] value) => 
+            WrapAsBinarizer(_jvmObject.Invoke("setInputCols", value));
+        
         /// <summary>
         /// Param for threshold used to <see cref="Binarizer"/> continuous features.
         /// </summary>
@@ -52,6 +66,26 @@ public Binarizer SetInputCol(string value) =>
         /// <returns>New <see cref="Binarizer"/> object</returns>
         public Binarizer SetThreshold(double value) => 
             WrapAsBinarizer(_jvmObject.Invoke("setThreshold", value));
+        
+        /// <summary>
+        /// Gets threshold used to <see cref="Binarizer"/> continuous features.
+        /// </summary>
+        /// <returns>double, the threshold</returns>
+        public double GetThreshold() => (double)(_jvmObject.Invoke("getThreshold"));
+        
+        /// <summary>
+        /// Param for thresholds used to <see cref="Binarizer"/> continuous features.
+        /// </summary>
+        /// <param name="value">Threshold values</param>
+        /// <returns>New <see cref="Binarizer"/> object</returns>
+        public Binarizer SetThresholds(double[] value) => 
+            WrapAsBinarizer(_jvmObject.Invoke("setThresholds", value));
+        
+        /// <summary>
+        /// Gets thresholds used to <see cref="Binarizer"/> continuous features.
+        /// </summary>
+        /// <returns>array of double, the thresholds</returns>
+        public double[] GetThresholds() => (double[])(_jvmObject.Invoke("getThresholds"));
 
         /// <summary>
         /// The <see cref="Binarizer"/> will create a new column in the DataFrame, this is the
@@ -68,6 +102,22 @@ public Binarizer SetThreshold(double value) =>
         /// <returns>New <see cref="Binarizer"/> object</returns>
         public Binarizer SetOutputCol(string value) => 
             WrapAsBinarizer(_jvmObject.Invoke("setOutputCol", value));
+        
+        /// <summary>
+        /// The <see cref="Binarizer"/> will create a new columns in the DataFrame, this is the
+        /// name of the new column.
+        /// </summary>
+        /// <returns>array of strings, the output column</returns>
+        public string[] GetOutputCols() => (string[])(_jvmObject.Invoke("getOutputCols"));
+
+        /// <summary>
+        /// The <see cref="Binarizer"/> will create a new columns in the DataFrame, this is the
+        /// name of the new column.
+        /// </summary>
+        /// <param name="value">The name of the new columns</param>
+        /// <returns>New <see cref="Binarizer"/> object</returns>
+        public Binarizer SetOutputCols(string[] value) => 
+            WrapAsBinarizer(_jvmObject.Invoke("setOutputCols", value));
 
         /// <summary>
         /// Executes the <see cref="Binarizer"/> and transforms the DataFrame to include the new