Hello,
I am working on trying to implement voice recognition into a game using Kinect. As far as I have seen, the only way to do this is by using plugins. Is this true? I have been trying to write a C# script based off of this Kinect SDK example. I am using the free version of Unity. Here is what I have:
using UnityEngine;
using System;
using System.Collections;
using System.IO;
using System.Linq;
using Microsoft.Kinect;
using Microsoft.Speech.AudioFormat;
using Microsoft.Speech.Recognition;
public class VoiceRecognition : MonoBehaviour {
private SpeechRecognitionEngine speechRecognizer;
// public KinectSensor CurrentSensor;
private static RecognizerInfo GetKinectRecognizer()
{
foreach (RecognizerInfo recognizer in SpeechRecognitionEngine.InstalledRecognizers())
{
string value;
recognizer.AdditionalInfo.TryGetValue("Kinect", out value);
if ("True".Equals(value, StringComparison.OrdinalIgnoreCase) && "en-US".Equals(recognizer.Culture.Name, StringComparison.OrdinalIgnoreCase))
{
return recognizer;
}
}
return null;
}
private SpeechRecognitionEngine CreateSpeechRecognizer() {
RecognizerInfo ri = GetKinectRecognizer();
SpeechRecognitionEngine sre;
sre = new SpeechRecognitionEngine(ri.Id);
var grammar = new Choices();
grammar.Add("hello");
grammar.Add("goodbye");
var gb = new GrammarBuilder{Culture = ri.Culture};
gb.Append(grammar);
var g = new Grammar(gb);
sre.LoadGrammar(g);
sre.SpeechRecognized += SpeechRecognized;
sre.SpeechHypothesized += SpeechHypothesized;
sre.SpeechRecognitionRejected += SpeechRejected;
return sre;
}
private void SpeechRecognized (object sender, SpeechRecognizedEventArgs e)
{
if (e.Result.Confidence < .4)
{
RejectSpeech(e.Result);
}
switch (e.Result.Text.ToUpperInvariant())
{
case "HELLO":
Debug.Log("Hi there.");
break;
case "GOODBYE":
Debug.Log("Goodbye then.");
break;
default:
break;
}
}
private void SpeechHypothesized (object sender, SpeechHypothesizedEventArgs e)
{
Debug.Log("Hypothesized: " + e.Result.Text + " " + e.Result.Confidence);
}
private void SpeechRejected (object sender, SpeechRecognitionRejectedEventArgs e)
{
RejectSpeech(e.Result);
}
private void RejectSpeech(RecognitionResult result)
{
Debug.Log("Excuse Me?");
}
/* private KinectSensor InitializeKinect()
{
CurrentSensor = KinectSensor.KinectSensors.FirstOrDefault(s => s.Status == KinectStatus.Connected);
speechRecognizer = CreateSpeechRecognizer();
CurrentSensor.Start();
Start ();
return CurrentSensor;
}
public VoiceRecognition() {
InitializeKinect();
}
*/
void Start () {
/* var audioSource = CurrentSensor.AudioSource;
audioSource.BeamAngleMode = BeamAngleMode.Adaptive;
var kinectStream = audioSource.Start();
speechRecognizer.SetInputToAudioStream(kinectStream, new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16,1,32000,2,null));
speechRecognizer.RecognizeAsync(RecognizeMode.Multiple);
CurrentSensor.AudioSource.EchoCancellationMode = EchoCancellationMode.None;
CurrentSensor.AudioSource.AutomaticGainControlEnabled = false;
*/
}
void Update () {
}
}
I get no comilation errors until I add the commented code. I have already referenced the Microsoft.Kinect and Microsoft.Speech dlls, so I’m not sure what the problem is. The error I get is:
Internal compiler error. See the console log for more information. output was:
Unhandled Exception: System.TypeLoadException: Could not load type ‘Microsoft.Kinect.SkeletonStream’ from assembly ‘Microsoft.Kinect, Version=1.7.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35’.
at (wrapper managed-to-native) System.Reflection.MonoMethodInfo:get_method_info (intptr,System.Reflection.MonoMethodInfo&)
at System.Reflection.MonoMethodInfo.GetMethodInfo (IntPtr handle) [0x00000] in :0
at System.Reflection.MonoMethodInfo.GetAttributes (IntPtr handle) [0x00000] in :0
at System.Reflection.MonoMethod.get_Attributes () [0x00000] in :0
Any ideas would be great. Thanks.