I am developing the following mechanics for the Unity 2d mobile platform. The player will speak words through the phone’s microphone
The words he says will appear on the screen and
The game will have a vocabulary
if the word he said. If it matches a word in the vocabulary, its score will increase by 1.
(For example: There is Apple, Pear, Banana… in the dictionary.
If the player says Apple, his score increases by 1. If he says words that are not in his vocabulary, such as Appe, Me, food, mother…, his score will neither increase nor decrease.)
How can I create this system? I messed around with the Google API and wrote a few drafts of code, but it didn’t work. Video etc. that you can recommend. is there? I am currently pasting the IP I got from the Google API into my code… but it does not pick up my voice in any way, it does not show me the words I say through the UI, nor does it use its vocabulary.
<using System.Collections;
using UnityEngine;
using UnityEngine.Networking;
using TMPro;
using System.IO;
public class VoiceManager : MonoBehaviour
{
public TextMeshProUGUI outputText;
private AudioClip audioClip;
private string apiKey = “AIzaSyAe-EF3Fy2LdGVqGDG1hyGQKmMjTQafS4M”; // Buraya API anahtarınızı yerleştirin
void Start()
{
StartCoroutine(StartMicrophone());
}
private IEnumerator StartMicrophone()
{
// Mikrofonu başlat
audioClip = Microphone.Start(null, true, 10, 44100);
yield return new WaitForSeconds(1f);
while (true)
{
yield return new WaitForSeconds(2f);
// Kayıtlı sesi API'ye gönder
yield return StartCoroutine(RecognizeSpeech());
}
}
private IEnumerator RecognizeSpeech()
{
// Mikrofon kaydını wav formatına dönüştür ve dosyayı kaydet
var audioData = WavUtility.FromAudioClip(audioClip);
// Boş ses verisi kontrolü
if (audioData == null || audioData.Length == 0)
{
Debug.LogError("Ses verisi boş. API isteği yapılmayacak.");
yield break; // API isteğini atla
}
// API'ye gönderim için gerekli verileri hazırla
string url = $"https://speech.googleapis.com/v1/speech:recognize?key={apiKey}";
// JSON nesnesini oluştur
var requestData = new
{
config = new
{
encoding = "LINEAR16",
sampleRateHertz = 44100,
languageCode = "tr-TR"
},
audio = new
{
content = System.Convert.ToBase64String(audioData)
}
};
// JSON verisini dizeye dönüştür
string json = JsonUtility.ToJson(requestData);
using (UnityWebRequest www = new UnityWebRequest(url, "POST"))
{
byte[] bodyRaw = System.Text.Encoding.UTF8.GetBytes(json);
www.uploadHandler = new UploadHandlerRaw(bodyRaw);
www.downloadHandler = new DownloadHandlerBuffer();
www.SetRequestHeader("Content-Type", "application/json");
yield return www.SendWebRequest();
if (www.result == UnityWebRequest.Result.ConnectionError || www.result == UnityWebRequest.Result.ProtocolError)
{
Debug.LogError($"Error: {www.error}"); // Hata mesajını göster
Debug.LogError($"Response: {www.downloadHandler.text}"); // API'den gelen yanıtı göster
}
else
{
// API'den gelen cevabı işleme
var response = JsonUtility.FromJson<GoogleResponse>(www.downloadHandler.text);
if (response.results.Length > 0)
{
string recognizedWord = response.results[0].alternatives[0].transcript;
UpdateUI(recognizedWord);
}
}
}
}
private void UpdateUI(string recognizedWord)
{
outputText.text = recognizedWord; // Tanınan kelimeyi UI'ya yaz
}
[System.Serializable]
public class GoogleResponse
{
public Result[] results;
}
[System.Serializable]
<using System;
using System.IO;
using UnityEngine;
public class WavUtility : MonoBehaviour
{
// Convert an AudioClip to a byte array in WAV format
public static byte FromAudioClip(AudioClip clip)
{
return ConvertToWav(clip);
}
// Convert an AudioClip to a WAV format byte array
private static byte[] ConvertToWav(AudioClip clip)
{
using (MemoryStream memoryStream = new MemoryStream())
{
int headerSize = 44; // WAV header size
memoryStream.Seek(headerSize, SeekOrigin.Begin);
float[] samples = new float[clip.samples];
clip.GetData(samples, 0);
short[] intData = new short[samples.Length];
byte[] bytesData = new byte[samples.Length * 2];
float rescaleFactor = 32767;
for (int i = 0; i < samples.Length; i++)
{
intData[i] = (short)(samples[i] * rescaleFactor);
byte[] byteArray = BitConverter.GetBytes(intData[i]);
byteArray.CopyTo(bytesData, i * 2);
}
memoryStream.Write(bytesData, 0, bytesData.Length);
memoryStream.Seek(0, SeekOrigin.Begin);
byte[] header = WriteWavHeader(clip, memoryStream);
memoryStream.Seek(0, SeekOrigin.Begin);
memoryStream.Write(header, 0, header.Length);
return memoryStream.ToArray();
}
}
// Write the WAV file header
private static byte[] WriteWavHeader(AudioClip clip, MemoryStream stream)
{
int frequency = clip.frequency;
int channels = clip.channels;
int samples = clip.samples;
using (MemoryStream headerStream = new MemoryStream(44))
{
using (BinaryWriter writer = new BinaryWriter(headerStream))
{
writer.Write("RIFF".ToCharArray());
writer.Write(36 + samples * 2);
writer.Write("WAVE".ToCharArray());
writer.Write("fmt ".ToCharArray());
writer.Write(16);
writer.Write((short)1); // PCM
writer.Write((short)channels);
writer.Write(frequency);
writer.Write(frequency * channels * 2);
writer.Write((short)(channels * 2));
writer.Write((short)16);
writer.Write("data".ToCharArray());
writer.Write(samples * 2);
}
return headerStream.ToArray();
}
}
}
public class Result
{
public Alternative[] alternatives;
}
[System.Serializable]
public class Alternative
{
public string transcript;
}
}