I’ve been doing a little testing with Tensorflow.Net in Unity and I thought I’d share my experiences here.
Installation
Versions used:
- Unity 2018.3.0f1
- Tensorflow 1.15.0
- Tensorflow.Net 0.15.1
I’ve added Tensorflow.Net by using NuGet2Unity, which allows you to convert a NuGet command into a unity package. The full list of dll’s in the package is:
- Google.Protobuf.dll (3.11.4.0)
- NumSharp.Lite.dll (0.1.7.0)
- Protobuf.Text.dll (0.4.0.0)
- System.Buffers.dll (4.6.25519.3)
- System.Memory.dll (4.6.27129.4)
- System.Numerics.Vectors.dll (4.6.25519.3)
- System.Runtime.CompilerServices.Unsafe.dll (4.6.26919.2)
- TensorFlow.NET.dll (0.15.1.0)
I’ve attached the package to this post for a quick start.
The tensorflow dll itself can just be added to the plugins folder after being downloaded here:
In Player Settings both Scripting Runtime Version and Api Compatibility Level should be set to .NET 4.x
Test 1 - Linear Regression
The first test to see whether things are working was doing a linear regression to the formula y = 4x - 10. The code for this to get you started is here:
TensorflowRegression.cs
using NumSharp;
using System;
using System.Threading.Tasks;
using Tensorflow;
using UnityEngine;
using static Tensorflow.Binding;
public class TensorflowRegression : TensorflowExample
{
private static readonly System.Random random = new System.Random(12345);
[Header("Training settings")]
public float learningRate = 0.01f;
public int trainingEpochs = 100;
public int displayStep = 10;
[Header("Training dataset")]
public TensorflowRegressionDataset train;
[Header("Testing dataset")]
public TensorflowRegressionDataset test;
protected override void ExecuteMainThread()
{
}
protected override async void ExecuteAsync()
{
try
{
Debug.Log("Initialize");
// Start generating samples
Task trainTask = train.GenerateAsync();
Task testTask = test.GenerateAsync();
// Placeholders
Tensor placeholderX = tf.placeholder(tf.float32, name: "x");
Tensor placeholderY = tf.placeholder(tf.float32, name: "y");
// We can set a fixed init value in order to debug
RefVariable weight = tf.Variable(-0.06f, name: "weight");
RefVariable bias = tf.Variable(-0.73f, name: "bias");
// Construct a linear model
Tensor layerOutput = tf.add(tf.multiply(placeholderX, weight), bias);
// Mean squared error
Tensor cost = tf.reduce_sum(tf.pow(layerOutput - placeholderY, 2.0f)) / (2.0f * train.samples);
// Gradient descent
Optimizer optimizer = tf.train.GradientDescentOptimizer(learningRate);
Operation trainStep = optimizer.minimize(cost);
// Wait for training samples to be ready
await trainTask;
// Start training
Debug.Log("Start training");
using (Session session = tf.Session())
{
// Run the initializer
session.run(tf.global_variables_initializer());
// Fit all training data
for (int epoch = 0; epoch < trainingEpochs; epoch++)
{
foreach ((float x, float y) in zip<float>(train.X, train.Y))
{
session.run(trainStep, (placeholderX, x), (placeholderY, y));
}
// Display logs per epoch step or if the last epoch
if (((epoch + 1) % displayStep == 0) || (epoch == trainingEpochs - 1))
{
float costValue = session.run(cost, (placeholderX, train.X), (placeholderY, train.Y));
Debug.Log($"Epoch: {epoch + 1}, cost={costValue} " + $", weight={session.run(weight)} , bias={session.run(bias)}");
}
}
Debug.Log("Finished training");
// Wait for testing samples to be ready
await testTask;
foreach (var (x, y) in zip<float>(test.X, test.Y))
{
float testingOutput = session.run(layerOutput, (placeholderX, x), (placeholderY, y));
Debug.Log($"Test input: {x} correct output={y} network output={testingOutput} difference={Mathf.Abs(y - testingOutput)}");
}
};
}
catch (Exception exception)
{
Debug.LogError(exception);
}
}
private static float Formula(float x)
{
return 4.0f * x - 10.0f;
}
[Serializable]
public class TensorflowRegressionDataset
{
public int samples = 10;
public float minimumX = -10.0f;
public float maximumX = 10.0f;
public NDArray X { get; private set; }
public NDArray Y { get; private set; }
public Task GenerateAsync()
{
return Task.Run(Generate);
}
public void Generate()
{
float[] x = new float[samples];
float[] y = new float[samples];
for (int index = 0; index < x.Length; index++)
{
x[index] = (float)(minimumX + (maximumX - minimumX) * random.NextDouble());
y[index] = Formula(x[index]);
}
X = np.array(x);
Y = np.array(y);
}
}
}
TensorflowExample.cs
using NumSharp;
using System;
using System.Drawing;
using System.Threading.Tasks;
using UnityEngine;
public abstract class TensorflowExample : MonoBehaviour
{
protected abstract void ExecuteMainThread();
protected abstract void ExecuteAsync();
protected async void Start()
{
ExecuteMainThread();
await Task.Run(ExecuteAsyncTimed);
}
private void ExecuteAsyncTimed()
{
long time = DateTime.Now.Ticks;
ExecuteAsync();
time = DateTime.Now.Ticks - time;
Debug.Log(FriendlyTime(time));
}
protected static Bitmap CreateImage(NDArray array, int width, int height)
{
Bitmap result = new Bitmap(width, height);
int index = 0;
for (int x = 0; x < width; x++)
{
for (int y = 0; y < height; y++)
{
float valueFloat = array.GetSingle(0, index++);
int valueInt = Mathf.RoundToInt(255.0f * Mathf.Clamp01(valueFloat));
System.Drawing.Color color = System.Drawing.Color.FromArgb(255, valueInt, valueInt, valueInt);
result.SetPixel(x, y, color);
}
}
return result;
}
private static string FriendlyTime(long ticks)
{
long ms = ticks / 10000;
if (ms < 10000L) return ms + " ms";
if (ms < 100000L) return (ms / 1000L) + "." + ZeroPad(ms / 10L % 100L, 2) + "s";
if (ms < 1000000L) return (ms / 1000L) + "." + ZeroPad(ms / 100L % 10L, 1) + "s";
long s = ms / 1000L;
return s + " s";
}
private static string ZeroPad(long number, int digits)
{
string result = number.ToString();
while (result.Length < digits) result = "0" + result;
return result;
}
}
Test 2 - Classification of mnist dataset using convolution
The second test involved the mnist dataset (handwritten digits from 0 to 9 as 28x28 grayscale images with labels.) The dataset consists of 60.000 training examples and 10.000 test cases. It’s a great starting point since the images are not too big keeping the training time down a little. This implementation was also fairly straightforward.
My fairly simple/standard network got 9.912 out of the 10.000 test cases correct. (And the test cases were of course not used during training.)
The complete network settings are:
General:
- Learning rate 0.001
- Training epochs 100
- Batch size 100
Data shape 28x28x1 (image)
Layer 1, convolution:
- Filter size 5x5
- Filter count 16
- Stride 1
- Relu activation
Data shape 24x24x16
Layer 2, max pooling:
- Size 2
- Stride 2
Data shape 12x12x16
Layer 3, convolution:
- Filter size 5x5
- Filter count 32
- Stride 1
- Relu activation
Data shape 8x8x32
Layer 4, max pooling:
- Size 2
- Stride 2
Data shape 4x4x32
Layer 5, fully connected:
- Outputs 128
- Relu activation
Data shape 1x1x128
Layer 6, fully connected:
- Outputs 10
Data shape 1x1x10 (labels)
A set of some mislabeled images:
At the top are the outputs from the network and at the bottom the actual labels.
Test 3 - Generation of mnist dataset using GAN
The next step was to build a GAN to generate images. I’ve chosen to also base this on the mnist dataset, in this case only with the digits 8 from the training set. Again because the mnist dataset is not too big with an image size of only 28x28 pixels.
Implementing a GAN comes with some challenges since there are three paths running at the same time:
- Noise → generator → discriminator → 1.0, to train the generator on what the discriminator wants to see
- Noise → generator → discriminator → 0.0, to train the discriminator on fake images
- Image → discriminator → 1.0, to train the discriminator on real images
This means:
- Setting up two discriminators that share the same trainable variables
- Keeping track of the trainable variables to specify which should be trained by which optimizer
Base steps for this:
- Create a generator converting noise into an image and keep track of its variables
- Create a discriminator based on the generator output (discriminatorFake) and keep track of its variables
- Create a discriminator based on the real images (discriminatorReal) with its variables shared with discriminatorFake.
- Create two optimizers:
tf.train.AdamOptimizer(learningRate).minimize(-tf.reduce_mean(tf.log(discriminatorFake)), var_list: generatorVariables);
tf.train.AdamOptimizer(learningRate).minimize(-tf.reduce_mean(tf.log(discriminatorReal) + tf.log(1.0f - discriminatorFake)), var_list: discriminatorVariables);
The complete network settings are:
General:
- Learning rate 0.0001
- Training epochs 5000
- Batch size 100
- Noise samples 100
Data shape 1x1x100 (noise)
Generator, layer 1, fully connected:
- Size 256
- Relu activation
Data shape 1x1x256
Generator, layer 2, fully connected:
- Size 784
- Sigmoid activation
Data shape 28x28x1 (image)
Discriminator, layer 1, fully connected:
- Size 256
- Relu activation
Data shape 1x1x256
Discriminator, layer 2, fully connected:
- Size 1
- Sigmoid activation
Data shape 1x1x1 (validity)
A set of 10 resulting generated images:
I’m very happy with that result, besides looking like handwritten digits 8, it shows good variation in the generated images.
Test 4 - Generation of mnist dataset using DCGAN
In order to improve the generation process, I wanted to use convolution and deconvolution instead of fully connected layers. The deconvolution (or transposed convolution) seems to be difficult to implement using Tensorflow.Net. More if this succeeds…