Whooray, finally make AdamOptimizer work as expected.

Oceania2018 · Oceania2018 · commit bf3d1e969c4a · 2019-07-06T10:10:12.000-05:00
diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs
@@ -3,6 +3,7 @@
 using System.Text;
 using Tensorflow.Operations;
 using Tensorflow.Operations.Activation;
+using static Tensorflow.Python;
 
 namespace Tensorflow
 {
@@ -101,6 +102,25 @@ public static Tensor sparse_softmax_cross_entropy_with_logits(Tensor labels = nu
             Tensor logits = null, string name = null)
                 => nn_ops.sparse_softmax_cross_entropy_with_logits(labels: labels, logits: logits, name: name);
 
+            /// <summary>
+            /// Computes softmax cross entropy between `logits` and `labels`.
+            /// </summary>
+            /// <param name="labels"></param>
+            /// <param name="logits"></param>
+            /// <param name="dim"></param>
+            /// <param name="name"></param>
+            /// <returns></returns>
+            public static Tensor softmax_cross_entropy_with_logits(Tensor labels, Tensor logits, int dim = -1, string name = null)
+            {
+                with(ops.name_scope(name, "softmax_cross_entropy_with_logits_sg", new { logits, labels }), scope =>
+                {
+                    name = scope;
+                    labels = array_ops.stop_gradient(labels, name: "labels_stop_gradient");
+                });
+
+                return softmax_cross_entropy_with_logits_v2(labels, logits, axis: dim, name: name);
+            }
+
             public static Tensor softmax_cross_entropy_with_logits_v2(Tensor labels, Tensor logits, int axis = -1, string name = null)
                 => nn_ops.softmax_cross_entropy_with_logits_v2_helper(labels, logits, axis: axis, name: name);
         }
diff --git a/src/TensorFlowNET.Core/Operations/OpDefLibrary.cs b/src/TensorFlowNET.Core/Operations/OpDefLibrary.cs
@@ -94,14 +94,28 @@ public Operation _apply_op_helper(string op_type_name, string name = null, Dicti
                             if (attrs.ContainsKey(input_arg.TypeAttr))
                                 dtype = (DataType)attrs[input_arg.TypeAttr];
                             else
-                                if (values is Tensor[] values1)
-                                    dtype = values1[0].dtype.as_datatype_enum();
+                                switch (values)
+                                {
+                                    case Tensor[] values1:
+                                        dtype = values1[0].dtype.as_datatype_enum();
+                                        break;
+                                    case object[] values1:
+                                        foreach(var t in values1)
+                                            if(t is Tensor tensor)
+                                            {
+                                                dtype = tensor.dtype.as_datatype_enum();
+                                                break;
+                                            }
+                                        break;
+                                    default:
+                                        throw new NotImplementedException($"can't infer the dtype for {values.GetType()}");
+                                }
 
                             if (dtype == DataType.DtInvalid && default_type_attr_map.ContainsKey(input_arg.TypeAttr))
                                 default_dtype = (DataType)default_type_attr_map[input_arg.TypeAttr];
                         }
 
-                        if(input_arg.IsRef && dtype != DataType.DtInvalid)
+                        if(!input_arg.IsRef && dtype != DataType.DtInvalid)
                             dtype = dtype.as_base_dtype();
 
                         values = ops.internal_convert_n_to_tensor(values, 
diff --git a/src/TensorFlowNET.Core/Operations/Operation.Output.cs b/src/TensorFlowNET.Core/Operations/Operation.Output.cs
@@ -17,9 +17,7 @@ public partial class Operation
 
         private Tensor[] _outputs;
         public Tensor[] outputs => _outputs;
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-#endif
+
         public Tensor output => _outputs.FirstOrDefault();
 
         public int NumControlOutputs => c_api.TF_OperationNumControlOutputs(_handle);
diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs
@@ -1,7 +1,4 @@
 ﻿using Google.Protobuf.Collections;
-#if GRAPH_SERIALIZE
-using Newtonsoft.Json;
-#endif
 using System;
 using System.Collections.Generic;
 using System.Linq;
@@ -37,21 +34,11 @@ public partial class Operation : ITensorOrOperation
         private Graph _graph;
         public string type => OpType;
 
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
         public Graph graph => _graph;
-        [JsonIgnore]
         public int _id => _id_value;
-        [JsonIgnore]
         public int _id_value;
-        [JsonIgnore]
         public Operation op => this;
-#else
-        public Graph graph => _graph;
-        public int _id => _id_value;
-        public int _id_value;
-        public Operation op => this;
-#endif
+
         public TF_DataType dtype => TF_DataType.DtInvalid;
         private Status status = new Status();
 
@@ -60,9 +47,6 @@ public partial class Operation : ITensorOrOperation
         public string Device => c_api.StringPiece(c_api.TF_OperationDevice(_handle));
 
         private NodeDef _node_def;
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-#endif
         public NodeDef node_def
         {
             get
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.py.cs b/src/TensorFlowNET.Core/Operations/array_ops.py.cs
@@ -492,13 +492,18 @@ public static Tensor concat(Tensor[] values, int axis, string name = "concat")
             {
                 return with(ops.name_scope(name), scope => {
                     var t = ops.convert_to_tensor(axis, name: "concat_dim", dtype: TF_DataType.TF_INT32);
-                    return identity(values[0], name = scope);
+                    return identity(values[0], name: scope);
                 });
             }
 
             return gen_array_ops.concat_v2(values, axis, name: name);
         }
 
+        public static Tensor concat(object[] values, int axis, string name = "concat")
+        {
+            return gen_array_ops.concat_v2(values, axis, name: name);
+        }
+
         public static Tensor gather(Tensor @params, Tensor indices, string name = null, int axis = 0)
             => gen_array_ops.gather_v2(@params, indices, axis, name: name);
 
diff --git a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs
@@ -19,7 +19,7 @@ public static class gen_array_ops
         /// <param name="axis"></param>
         /// <param name="name"></param>
         /// <returns></returns>
-        public static Tensor concat_v2(Tensor[] values, int axis, string name = null)
+        public static Tensor concat_v2<T>(T[] values, int axis, string name = null)
         {
             var _op = _op_def_lib._apply_op_helper("ConcatV2", name: name, args: new { values, axis });
 
diff --git a/src/TensorFlowNET.Core/Operations/nn_ops.cs b/src/TensorFlowNET.Core/Operations/nn_ops.cs
@@ -1,5 +1,6 @@
 ﻿using System;
 using System.Collections.Generic;
+using System.Linq;
 using System.Text;
 using Tensorflow.Operations;
 using static Tensorflow.Python;
@@ -159,8 +160,9 @@ public static Tensor softmax_cross_entropy_with_logits_v2_helper(Tensor labels,
             int axis = -1,
             string name = null)
         {
-            return Python.with(ops.name_scope(name, "softmax_cross_entropy_with_logits", new { }), scope =>
+            return with(ops.name_scope(name, "softmax_cross_entropy_with_logits", new { logits, labels }), scope =>
             {
+                name = scope;
                 var precise_logits = logits;
                 var input_rank = array_ops.rank(precise_logits);
                 var shape = logits.TensorShape;
@@ -170,6 +172,10 @@ public static Tensor softmax_cross_entropy_with_logits_v2_helper(Tensor labels,
 
                 var input_shape = array_ops.shape(precise_logits);
 
+                // Make precise_logits and labels into matrices.
+                precise_logits = _flatten_outer_dims(precise_logits);
+                labels = _flatten_outer_dims(labels);
+
                 // Do the actual op computation.
                 // The second output tensor contains the gradients.  We use it in
                 // _CrossEntropyGrad() in nn_grad but not here.
@@ -186,5 +192,50 @@ public static Tensor softmax_cross_entropy_with_logits_v2_helper(Tensor labels,
                 return cost;
             });
         }
+
+        /// <summary>
+        /// Flattens logits' outer dimensions and keep its last dimension.
+        /// </summary>
+        /// <param name="logits"></param>
+        /// <returns></returns>
+        private static Tensor _flatten_outer_dims(Tensor logits)
+        {
+            var rank = array_ops.rank(logits);
+            var last_dim_size = array_ops.slice(array_ops.shape(logits),
+                new[] { math_ops.subtract(rank, 1) },
+                new[] { 1 });
+
+            var ops = array_ops.concat(new[] { new[] { -1 }, (object)last_dim_size }, 0);
+            var output = array_ops.reshape(logits, ops);
+
+            // Set output shape if known.
+            // if not context.executing_eagerly():
+            var shape = logits.TensorShape;
+            if(shape != null && shape.NDim > 0)
+            {
+                var product = 1;
+                var product_valid = true;
+                foreach(var d in shape.Dimensions.Take(shape.NDim - 1))
+                {
+                    if(d == -1)
+                    {
+                        product_valid = false;
+                        break;
+                    }
+                    else
+                    {
+                        product *= d;
+                    }
+                }
+
+                if (product_valid)
+                {
+                    var output_shape = new[] { product };
+                    throw new NotImplementedException("_flatten_outer_dims product_valid");
+                }
+            }
+
+            return output;
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs
@@ -22,21 +22,11 @@ public partial class Tensor : IDisposable, ITensorOrOperation
 
         private int _id;
         private Operation _op;
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-        public int Id => _id;
-        [JsonIgnore]
-        public Graph graph => op?.graph;
-        [JsonIgnore]
-        public Operation op => _op;
-        [JsonIgnore]
-        public Tensor[] outputs => op.outputs;
-#else
+
         public int Id => _id;
         public Graph graph => op?.graph;
         public Operation op => _op;
         public Tensor[] outputs => op.outputs;
-#endif
 
         /// <summary>
         /// The string name of this tensor.
@@ -50,18 +40,12 @@ public partial class Tensor : IDisposable, ITensorOrOperation
 
         private TF_DataType _dtype = TF_DataType.DtInvalid;
         public TF_DataType dtype => _handle == IntPtr.Zero ? _dtype : c_api.TF_TensorType(_handle);
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-#endif
+
         public ulong bytesize => _handle == IntPtr.Zero ? 0 : c_api.TF_TensorByteSize(_handle);
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-#endif
+
         public ulong itemsize => _handle == IntPtr.Zero ? 0 : c_api.TF_DataTypeSize(dtype);
         public ulong size => _handle == IntPtr.Zero ? 0 : bytesize / itemsize;
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-#endif
+
         public IntPtr buffer => _handle == IntPtr.Zero ? IntPtr.Zero : c_api.TF_TensorData(_handle);
         public int num_consumers(TF_Output oper_out) => _handle == IntPtr.Zero ? 0 : c_api.TF_OperationOutputNumConsumers(oper_out);
 
@@ -70,9 +54,6 @@ public partial class Tensor : IDisposable, ITensorOrOperation
         /// <summary>
         /// used for keep other pointer when do implicit operating
         /// </summary>
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-#endif
         public object Tag { get; set; }
 
         public int[] shape
@@ -140,9 +121,7 @@ public int rank
                 }
             }
         }
-#if GRAPH_SERIALIZE
-        [JsonIgnore]
-#endif
+
         public int NDims => rank;
 
         public string Device => op.Device;
diff --git a/src/TensorFlowNET.Core/Train/AdamOptimizer.cs b/src/TensorFlowNET.Core/Train/AdamOptimizer.cs
@@ -110,7 +110,7 @@ public override Operation _finish(Operation[] update_ops, string name_scope)
                 var update_beta2 = beta2_power.assign(beta2_power * _beta2_t, use_locking: _use_locking);
 
                 operations.Add(update_beta1);
-                operations.Add(update_beta1);
+                operations.Add(update_beta2);
             });
 
             return control_flow_ops.group(operations.ToArray(), name: name_scope);
diff --git a/test/TensorFlowNET.Examples/ImageProcess/DigitRecognitionNN.cs b/test/TensorFlowNET.Examples/ImageProcess/DigitRecognitionNN.cs
@@ -49,8 +49,6 @@ public bool Run()
 
         public Graph BuildGraph()
         {
-            var g = tf.Graph();
-
             // Placeholders for inputs (x) and outputs(y)
             x = tf.placeholder(tf.float32, shape: (-1, img_size_flat), name: "X");
             y = tf.placeholder(tf.float32, shape: (-1, n_classes), name: "Y");
@@ -60,15 +58,16 @@ public Graph BuildGraph()
             // Create a fully-connected layer with n_classes nodes as output layer
             var output_logits = fc_layer(fc1, n_classes, "OUT", use_relu: false);
             // Define the loss function, optimizer, and accuracy
-            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels: y, logits: output_logits), name: "loss");
+            var logits = tf.nn.softmax_cross_entropy_with_logits(labels: y, logits: output_logits);
+            loss = tf.reduce_mean(logits, name: "loss");
             optimizer = tf.train.AdamOptimizer(learning_rate: learning_rate, name: "Adam-op").minimize(loss);
             var correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name: "correct_pred");
             accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name: "accuracy");
 
             // Network predictions
             var cls_prediction = tf.argmax(output_logits, axis: 1, name: "predictions");
 
-            return g;
+            return tf.get_default_graph();
         }
 
         private Tensor fc_layer(Tensor x, int num_units, string name, bool use_relu = true)
@@ -93,16 +92,10 @@ private Tensor fc_layer(Tensor x, int num_units, string name, bool use_relu = tr
             return layer;
         } 
 
-        public Graph ImportGraph()
-        {
-            throw new NotImplementedException();
-        }
-
-        public bool Predict()
-        {
-            throw new NotImplementedException();
-        }
+        public Graph ImportGraph() => throw new NotImplementedException();
 
+        public bool Predict() => throw new NotImplementedException();
+            
         public void PrepareData()
         {
             mnist = MnistDataSet.read_data_sets("mnist", one_hot: true);
@@ -112,7 +105,6 @@ public bool Train()
         {
             // Number of training iterations in each epoch
             var num_tr_iter = mnist.train.labels.len / batch_size;
-
             return with(tf.Session(), sess =>
             {
                 var init = tf.global_variables_initializer();
@@ -153,10 +145,9 @@ public bool Train()
                     print("---------------------------------------------------------");
                     print($"Epoch: {epoch + 1}, validation loss: {loss_val.ToString("0.0000")}, validation accuracy: {accuracy_val.ToString("P")}");
                     print("---------------------------------------------------------");
-
                 }
 
-                return accuracy_val > 0.9;
+                return accuracy_val > 0.95;
             });
         }
 
diff --git a/test/TensorFlowNET.Examples/python/neural_network.py b/test/TensorFlowNET.Examples/python/neural_network.py

Original file line number	Diff line number	Diff line change
`@@ -492,13 +492,18 @@ public static Tensor concat(Tensor[] values, int axis, string name = "concat")`
`492`	`492`	`{`
`493`	`493`	`return with(ops.name_scope(name), scope => {`
`494`	`494`	`var t = ops.convert_to_tensor(axis, name: "concat_dim", dtype: TF_DataType.TF_INT32);`
`495`		`- return identity(values[0], name = scope);`
	`495`	`+ return identity(values[0], name: scope);`
`496`	`496`	`});`
`497`	`497`	`}`
`498`	`498`
`499`	`499`	`return gen_array_ops.concat_v2(values, axis, name: name);`
`500`	`500`	`}`
`501`	`501`
	`502`	`+ public static Tensor concat(object[] values, int axis, string name = "concat")`
	`503`	`+ {`
	`504`	`+ return gen_array_ops.concat_v2(values, axis, name: name);`
	`505`	`+ }`
	`506`	`+`
`502`	`507`	`public static Tensor gather(Tensor @params, Tensor indices, string name = null, int axis = 0)`
`503`	`508`	`=> gen_array_ops.gather_v2(@params, indices, axis, name: name);`
`504`	`509`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ public static class gen_array_ops`
`19`	`19`	`/// <param name="axis"></param>`
`20`	`20`	`/// <param name="name"></param>`
`21`	`21`	`/// <returns></returns>`
`22`		`- public static Tensor concat_v2(Tensor[] values, int axis, string name = null)`
	`22`	`+ public static Tensor concat_v2<T>(T[] values, int axis, string name = null)`
`23`	`23`	`{`
`24`	`24`	`var _op = _op_def_lib._apply_op_helper("ConcatV2", name: name, args: new { values, axis });`
`25`	`25`