I apply a neural style transfer model to webcam input (https://github.com/onnx/models/tree/main/vision/style_transfer/fast_neural_style)
The output of the model is mostly white and primary colors - very different from what I’d expect.
Running the model in Python works well.
Am I doing something wrong? I am proficient with Python and Pytorch, but not at all experienced with dotnet or Unity. What’s the best way to test or log statistics about the tensors? E.g. I would like to know what the min and max of a tensor is as I know the model expects the input to be between 0 and 255.
Model inference code looks like this:
public class StyleTransfer : MonoBehaviour
{
public ModelAsset modelAsset;
public WebcamController webcamController;
Model runtimeModel;
IWorker worker;
// Start is called before the first frame update
void Start()
{
runtimeModel = ModelLoader.Load(modelAsset);
worker = WorkerFactory.CreateWorker(BackendType.GPUCompute, runtimeModel);
// Start the coroutine to run the model every second
StartCoroutine(RunModelEverySecond());
}
IEnumerator RunModelEverySecond()
{
while (true)
{
if (webcamController.capturedFrame != null)
{
runInference(webcamController.capturedFrame);
}
// Wait before running the model again
yield return new WaitForSeconds(1f);
}
}
public void runInference(Texture image)
{
using var ops = new CPUOps();
// Create input as tensor
using (TensorFloat inputTensor = TextureConverter.ToTensor(image, width: 224, height: 224, channels: 3))
{
// Run the model with the input data
worker.Execute(inputTensor); // should be in range 0..255
using (TensorFloat outputTensor = worker.PeekOutput() as TensorFloat)
{
outputTensor.MakeReadable();
GetComponent<Renderer>().material.mainTexture = TextureConverter.ToTexture(ops.Clip(outputTensor, 0, 255f));
}
}
}
}