code for Real Time Audio analysis in Unity

hi all,
so I’ve been looking around for a way to do real-time audio analysis for a visualizer in Unity, and there seem to be a lot of questions and no good answers.

I haven’t got a really good answer either, but I came up with a workable if extremely latent (for audio analysis) solution.
I’m posting this code here in hopes of flushing out some better ideas on how to do this, and for some critique on the (admittedly rickety) code that I’ve come up with to solve this.

Explanation:
I’ve taken two audio buffers, and I begin recording into one, and then after a 50ms delay, I begin playing back that audio buffer while it is still being recorded into. That audio object is told to play back at 99 or 98%, which is fast enough to get close to the desired response, while also slow enough that it cannot catch up to the record stream.

This is ended after 2 seconds (so that the live audio and the playback audio do not fall too far out of sync), and the process is repeated on a second audio buffer so that the first can be cleared out.

Now, for what I’m trying to do, this seems like a horrible workaround, but in practice it works just fine.
AS LONG AS you don’t need to actually HEAR that audio back! If you want to hear the audio playback, it’s awful and pops every 2 seconds and occasionally gets crunchy. But for the purposes of visuals, where the audio is being played back on a separate system and also being piped into Unity, this works reasonably well.

I’d love to hear if anyone has any ideas on how this can be improved, or have alternate methods to suggest.

using UnityEngine;
using System.Collections;


public class FFTSubmit : MonoBehaviour {
	
	#region vars

	//struct for containing alternating audio buffers
	private struct AudioObj
	{
		public GameObject player;
		public AudioClip clip;
		public void SetClip(AudioClip c)
		{
			player.audio.clip = null;
			clip = null;
			clip = c;
			player.audio.clip = clip;
			//slow down playback speed slightly
			//this prevents the playback from overtaking analysis / recording
			player.audio.pitch = .98f;

		}
	}
	
	private AudioObj[] audioObj = new AudioObj[2];
	
	private const int BANDS = 4;
	
	public float[] curve = new float[BANDS];
	public float[] output = new float[BANDS];
	
	public string[] inputDevices;
	private int[] crossovers = new int[BANDS];
	private float[] freqData = new float[8192];
	private float[] band;
	
	private bool swap;
	
	public GameObject playerPrefab;
	private int index = 0;
	
	
	private bool doSound = true;
	private int deviceNum= 0;
	#endregion

	#region Unity Methods
	void Start () 
	{
		crossovers[0] = 30;
		crossovers[1] = 50;
		crossovers[2] = 600;
		crossovers[3] = freqData.Length;
		
		band = new float[BANDS];
		output = new float[BANDS];
		
		for(int i = 0; i < 2; i ++)
		{
			audioObj[i].player  = (GameObject)Instantiate(playerPrefab);
			audioObj[i].player.transform.parent = transform;
			audioObj[i].player.transform.position = Vector3.zero;
			audioObj[i].clip = new AudioClip();
		}
		
		InvokeRepeating("Check", 0, 1.0f/15.0f);
		StartCoroutine(StartRecord());
		
		inputDevices = new string[Microphone.devices.Length];
		for (int i = 0; i < Microphone.devices.Length; i ++)
			inputDevices[i] = Microphone.devices[i].ToString();
	}
	
	void Update()
	{
		KeyInput();
	}
	#endregion

	#region Actions
	
	private void Check()
	{
		if(!doSound)
			return;
		
		AudioListener.GetSpectrumData(freqData, 0, FFTWindow.Hamming);
		bool cutoff = false;
		int k = 0;
		for(int i = 0; i < freqData.Length; i ++)
		{
			
			//stop if we've run out of bands
			if(k > BANDS - 1)
					break;
			
			float d = freqData[i];
			float b = band[k];
			band[k] = (d>b) ? d : b;
			if(i > crossovers[k] - 10)
			{
				if(cutoff)
					break;
				
				output[k] = band[k];
				band[k] = 0;
				
				k++;
				//encountered a problem with noise / clips at the end of the sample array
				//prevent analyzing those samples
				if(i > crossovers[BANDS - 1] - 10)
					cutoff = true;
			}
		}
	
	}
	
	private IEnumerator StartRecord()
	{
		//clear audio clip and begin recording
		audioObj[index].SetClip(null);
		audioObj[index].clip = Microphone.Start(Microphone.devices[deviceNum], false, 2, 48000);
		print ("recording to audioObj " + index);

		StartCoroutine(StartPlay (audioObj[index].clip));
		yield return new WaitForSeconds(2);
		Microphone.End(Microphone.devices[deviceNum]);
		
		//repeat the whole process
		StartCoroutine(StartRecord());
		
	}
	
	private IEnumerator StartPlay(AudioClip buffer)
	{	
		audioObj[index].SetClip(buffer);
		
		
		//50 ms delay time was the minimum I was able to reach on my system without horrible noise artifacts
		yield return new WaitForSeconds(.05f);
		audioObj[index].player.SetActive(true);
		audioObj[index].player.audio.Play();
		
		//stop playing the previous audio buffer, prepare for it to begin recording
		audioObj[Mathf.Abs((index % 2) - 1)].player.audio.Stop();
		audioObj[Mathf.Abs((index % 2) - 1)].player.SetActive(false);
		
		index++;
		if(index > 1)
			index = 0;
	}
	
	private void KeyInput()
	{
		if(Input.GetKeyDown(KeyCode.A))
		{
			doSound = !doSound;
		}
		if(Input.GetKeyDown(KeyCode.Equals))
		{
			deviceNum ++;
			if(deviceNum > Microphone.devices.Length - 1)
				deviceNum = 0;
		}
	}
	#endregion
	
	
}

this script could potentially analyze the sound of crickets, which there is a lot of right now