Actually, running the same IWorker multiple times in a row doesn’t work properly. Seems like it does, but the output is different than when I run the same input tensors on separate workers. This is why I split it up to a pool of them in the first place… to isolate the issues I was seeing. Now that I folded it back together, it’s broken again.
So… let me just paste my code here and see if you see anything wrong with my approach:
using System.Collections.Generic;
using System.Diagnostics;
using UnityEngine;
using Unity.Sentis;
using System;
namespace Inferencer
{
public class SentisInferencer : IInferencer
{
private int _inferenceCounter;
private Dictionary<string, Model> _models = new Dictionary<string, Model>();
private Dictionary<string, IWorker> _workers = new Dictionary<string, IWorker>(); // it's apparently legal to reuse the same worker for multiple executions. I assume at .Execute() it generates the output tensors.
private Dictionary<int, (string, IWorker, Tensor, List<Tensor>)> _idToOutputsAndInputs = new Dictionary<int, (string, IWorker, Tensor, List<Tensor>)>(); // id -> (model, engine, outputTensor, inputTensors)
public void LoadModel(string name, string modelPath, bool useCPU=false)
{
try
{
ModelAsset modelAsset = Resources.Load<ModelAsset>($"Models/{modelPath}");
Model model = ModelLoader.Load(modelAsset);
_models.Add(name, model);
IWorker engine = ProduceWorker(name);
_workers.Add(name, engine);
}
catch (Exception e)
{
UnityEngine.Debug.LogException(e);
}
}
public int Inference(string name, string outputName, Dictionary<string, (float[], int[], DataType)> nameAndData)
{
if (_workers.TryGetValue(name, out IWorker engine))
{
// put placeholder in the inference outputs dictionary
_inferenceCounter++;
List<Tensor> inputTensors = new List<Tensor>();
foreach (KeyValuePair<string, (float[], int[], DataType)> kvp in nameAndData)
{
float[] srcData = kvp.Value.Item1;
int[] srcDims = kvp.Value.Item2;
DataType dtype = kvp.Value.Item3;
switch (dtype)
{
case Inferencer.DataType.Float:
{
// Sentis considers a single value not a dimensioned tensor. Weird.
if (srcDims==null)
{
TensorFloat tensorFloat = new TensorFloat(srcData[0]);
inputTensors.Add(tensorFloat);
engine.SetInput(kvp.Key, tensorFloat);
}
else
{
TensorFloat tensorFloat = new TensorFloat(new TensorShape(srcDims), srcData);
inputTensors.Add(tensorFloat);
engine.SetInput(kvp.Key, tensorFloat);
}
break;
}
case Inferencer.DataType.Integer:
{
if (srcDims==null)
{
TensorInt tensorInt = new TensorInt((int)srcData[0]);
inputTensors.Add(tensorInt);
engine.SetInput(kvp.Key, tensorInt);
}
else
{
int[] data = new int[srcData.Length];
for (int i=0; i<data.Length; i++)
{
data[i] = (int)srcData[i];
}
TensorShape inputDataShape = new TensorShape(srcDims);
TensorInt tensorInt = new TensorInt(inputDataShape, data);
inputTensors.Add(tensorInt);
engine.SetInput(kvp.Key, tensorInt);
}
break;
}
default:
UnityEngine.Debug.Log($"Unrecognized input type for {name} = {kvp.Value.Item2}");
break;
}
}
Stopwatch stopWatchInf = Stopwatch.StartNew();
// Start the inferencer
engine.Execute();
UnityEngine.Debug.Log($"{name} Inference execute {stopWatchInf.ElapsedMilliseconds}");
Tensor outputTensor = engine.PeekOutput(outputName); // Grab a reference to the output so we can stash it
outputTensor.AsyncReadbackRequest(null);
// Store off the unfinished inference for later
_idToOutputsAndInputs.Add(_inferenceCounter, (name, engine, outputTensor, inputTensors));
return _inferenceCounter;
}
return -1;
}
// Returns null if the data isn't present yet.
public (float[], int[]) GetData(int outputID)
{
if (_idToOutputsAndInputs.TryGetValue(outputID, out (string modelName, IWorker engine, Tensor output, List<Tensor> inputs) x))
{
(float[] data, int[] dims) results = (null, null);
if (x.output.IsAsyncReadbackRequestDone())
{
_idToOutputsAndInputs.Remove(outputID);
x.output.MakeReadable();
// Read the outputs here and fill the output dictionary for this inference, then put it into the outputDict
Dictionary<string, (float[], int[], DataType)> outputs = new Dictionary<string, (float[], int[], DataType)>();
results.dims = x.output.shape.ToArray();
results.data = ((TensorFloat)x.output).ToReadOnlyArray(); // only support a single TensorFloat as the outputs for now.
// Dispose the input tensors
foreach (Tensor t in x.inputs)
{
t.Dispose();
}
}
return results;
}
throw new Exception($"Caller requested inference results from an invalid ID {outputID}");
}
public void Shutdown()
{
// Make sure we dispose of all the InferenceSession objects, as they may be hanging onto GPU resources.
foreach (KeyValuePair<int, (string modelName, IWorker engine, Tensor output, List<Tensor> inputs)> kvp in _idToOutputsAndInputs)
{
kvp.Value.output.Dispose();
foreach (Tensor t in kvp.Value.inputs)
{
t.Dispose();
}
}
_idToOutputsAndInputs.Clear();
// Release all the workers
foreach (KeyValuePair<string, IWorker> kvp in _workers)
{
kvp.Value.Dispose();
}
_models.Clear();
}
private IWorker ProduceWorker(string modelName)
{
IWorker engine = null;
// Find the model first, then spin up a worker for it.
if (_models.TryGetValue(modelName, out Model model))
{
try
{
engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
UnityEngine.Debug.Log($"{modelName} is connected to the GPU Compute");
}
catch
{
try
{
engine = WorkerFactory.CreateWorker(BackendType.GPUCommandBuffer, model);
UnityEngine.Debug.Log($"{modelName} is connected to the GPU CommandBuffer");
}
catch
{
try
{
engine = WorkerFactory.CreateWorker(BackendType.CPU, model);
UnityEngine.Debug.Log($"{modelName} is connected to the CPU");
}
catch
{
try
{
engine = WorkerFactory.CreateWorker(BackendType.GPUPixel, model);
UnityEngine.Debug.Log($"{modelName} is connected to the GPU via GPUPixel");
}
catch
{
UnityEngine.Debug.Log($"SentisInferencer Inference failed to create worker for {modelName}");
}
}
}
}
}
else
{
UnityEngine.Debug.Log($"SentisInferencer model {modelName} not loaded");
}
return engine;
}
}
}
Generally what happens is I call LoadModel and pass in the name, my code loads the model and produces a worker. When I want to inference, I call Inference and pass in the model name, the name of the output tensor, and a dictionary of the inputs. That gets translated to what Sentis wants, and I get back an int that lets the caller poll in the Update every frame until the data is ready.
This same interface has been implemented with DirectML and directly with OnnxRuntime, among others. It helps isolate the issues with a specific provider and keep the caller code agnostic.
And I should point out, when I had a list of IWorker objects instead of just one, I would take one off the list, use it, then return it when the GetData() call succeeds. That provided correct output. This does not.