using Steamworks;
using UnityEngine;
using UnityEngine.Networking;
public class VoiceChat : NetworkBehaviour
{
public LayerMask PlayerMask;
public AudioSource audioSource;
void Update ()
{
if (isLocalPlayer && Input.GetKeyUp(KeyCode.V))
SteamUser.StartVoiceRecording();
else if (isLocalPlayer && Input.GetKeyDown(KeyCode.V))
SteamUser.StopVoiceRecording();
if(isLocalPlayer)
{
uint Compressed;
uint Uncompressed;
EVoiceResult ret = SteamUser.GetAvailableVoice(out Compressed, out Uncompressed, 0);
if(ret == EVoiceResult.k_EVoiceResultOK && Compressed > 1024)
{
Debug.Log(Compressed);
byte[] DestBuffer = new byte[1024];
uint BytesWritten;
uint uncompressedBytesWritten;
ret = SteamUser.GetVoice(true, DestBuffer, 1024, out BytesWritten, false, new byte[0], 0, out uncompressedBytesWritten, 22050);
if(ret == EVoiceResult.k_EVoiceResultOK && BytesWritten > 0)
{
Cmd_SendData(DestBuffer, BytesWritten);
}
}
}
}
[Command (channel = 2)]
void Cmd_SendData(byte[] data, uint size)
{
Debug.Log("Command");
Collider[] cols = Physics.OverlapSphere(transform.position, 50, PlayerMask);
for (int i = 0; i < cols.Length; i++)
{
if(cols[i].GetComponent<NetworkIdentity>())
{
Target_PlaySound(cols[i].GetComponent<NetworkIdentity>().connectionToClient, data, size);
}
}
}
[TargetRpc (channel = 2)]
void Target_PlaySound(NetworkConnection connection, byte[] DestBuffer, uint BytesWritten)
{
Debug.Log("TARGET");
byte[] DestBuffer2 = new byte[22050 * 2];
uint BytesWritten2;
EVoiceResult ret = SteamUser.DecompressVoice(DestBuffer, BytesWritten, DestBuffer2, (uint)DestBuffer2.Length, out BytesWritten2, 22050);
if(ret == EVoiceResult.k_EVoiceResultOK && BytesWritten2 > 0)
{
audioSource.clip = AudioClip.Create(UnityEngine.Random.Range(100,1000000).ToString(), 22050, 1, 22050, false);
float[] test = new float[22050];
for (int i = 0; i < test.Length; ++i)
{
test[i] = (short)(DestBuffer2[i * 2] | DestBuffer2[i * 2 + 1] << 8) / 32768.0f;
}
audioSource.clip.SetData(test, 0);
audioSource.Play();
}
}
}
This code will only send the voice data to close players. Might not be what you desire. If not, you can simply change that bit out.
Itās NOT flawless. But it might help you get started.
i hooked up something very similar with steamworks a few months ago, but the playback was a little garbled. i havenāt had a chance to revisit it. i had a pretty hard time ensuring that it was writing the data to the audio buffer in the exact correct spot.
For some reason there is a split second of silence making a stutter between the audioclips. Have been trying to remove it. Some sort of backlog of AudioClips was my inital idea. But it seems that simply switching clip has a stutter and I am yet to figure out a smooth way to do it. Try it if you like. Would love to know how/if you get it working
i think the approach i was taking was to have a single AudioClip with a bigger buffer than your typical audio sample that is sent down the wire. like say 2 seconds of worth of audio. from there -
when you start collecting samples, you paste them in the correct spot into the buffer (being careful when pasting a sample near the end of the buffer of course). after a small amount of time collecting samples (to ensure the playhead always has some audio to play), start looping playback. you also need to ensure that youāre filling any āholesā with silence so that you donāt accidentally hear old data from prevoius playbacks. iād love to figure out an easier way because this approach was pretty complicated, i thought!
That might be the answer. Iāll probably try get that working when I have time.
So basically you will never reach the end of the clip. You could have a clip x times the size of one clip. Then fill it, start playing. And once the first part is played. You could replace that with a new one. So you allways have a clip with a few clips in.
one other thing thatās worth noting here - if youāre just using unet out of the box, iām not sure that itās feasible to send voice data like this. passing it through the relay server might blow out your bandwidth usage.
iām personally sending this data via steamworksā p2p layer to avoid all that mess
Glad you pointed that out!
Personally I am running dedicated only without relay. So for me itās fine. But feel free to let me know your results if you get back to it
hey iāve been looking at this voicechat stuff again today and i got it sounding much better thanks to a tip on the steamworks forum. itās not perfect, but it sounds way better than my other approach.
i tried to clean up the code a bit into something thatās easy to follow. it probably wonāt compile for you - this is more to demonstrate the basic playback approach.
youāre going to need to connect some dots to get this working on your end though. notes to followā¦
public class SteamVoiceChatPeer : MonoBehaviour
{
public SortedList<ulong, VoiceChat.VoiceChatPacket> packetsToPlay = new SortedList<ulong, VoiceChatPacket>();
private AudioSource m_audioSource;
int position = 0;
VoiceChat.VoiceChatPacket currentPacket;
int currentPacketSampleIndex = 0;
void Start()
{
int size = VoiceChat.VoiceChatSettings.Instance.Frequency * 10;// bigger size seems to help with popping a little, but i might be making that up.
m_audioSource = GetComponent<AudioSource>();
m_audioSource.loop = true;
m_audioSource.clip = AudioClip.Create ("VoiceChat", size, 1, VoiceChatSettings.Instance.Frequency, true, OnAudioRead, OnAudioSetPosition);
m_audioSource.Play ();
}
void OnAudioRead(float[] data)
{
// fresh start?
if (currentPacket == null) {
currentPacket = NextPacket ();
currentPacketSampleIndex = 0;
if (currentPacket != null) {
packetsToPlay.Remove (currentPacket.PacketId);
}
}
int count = 0;
while (count < data.Length) {
// copy the right data over.
float sample = 0;
if (currentPacket != null) {
sample = currentPacket.DecodedData [currentPacketSampleIndex];
currentPacketSampleIndex++;
if (currentPacketSampleIndex >= currentPacket.DecodedData.Length) {
currentPacket = NextPacket ();
currentPacketSampleIndex = 0;
if (currentPacket != null) {
packetsToPlay.Remove (currentPacket.PacketId);
}
}
}
data [count] = sample;
position++;
count++;
}
}
void OnAudioSetPosition(int newPosition)
{
position = newPosition;
}
private VoiceChat.VoiceChatPacket NextPacket()
{
if (packetsToPlay.Count > 0) {
var pair = packetsToPlay.First ();
VoiceChat.VoiceChatPacket packet = pair.Value;
if (packet != null) {
return packet;
}
}
return null;
}
public void OnNewSample(VoiceChatPacket newPacket)
{
if (packetsToPlay.ContainsKey (newPacket.PacketId)) {
Debug.LogError ("already have packet " + newPacket.PacketId + ". abort");
return;
}
// convert immediately.
newPacket.Decode ();
// throw out silence for now just to see how it acts
if (!newPacket.IsSilence) {
packetsToPlay.Add (newPacket.PacketId, newPacket);
}
}
}
// this class was originally part of another voice chat library i found on github. i've since added to and butchered partsof it. you probably only need:
// length, data, decodedData, and packetId for this example to work. and the Decode function for converting steam's decompressed data into a unity friendly format
public class VoiceChatPacket
{
public VoiceChatCompression Compression;
public int Length;
public byte[] Data;// <<<<< this is your steam uncompressed voice
public float[] DecodedData = null;
public int NetworkId;
public ulong PacketId;
public double Timestamp;
public int LengthInSamples;
public bool IsSilence = false;
public void Decode()
{
DecodedData = new float[Length / 2];// todo :: pool this array?
for (int i = 0; i < DecodedData.Length; i++) {
float value = (float) System.BitConverter.ToInt16 (Data, i * 2);
DecodedData [i] = value / (float)short.MaxValue;
}
LengthInSamples = DecodedData.Length;
}
}
so the main change was switching my AudioClipās constructor to be streaming, and using the PCM reader callback to write my audio data into the buffer object it gives you.
note that i left out all the stuff about capturing & decompressing voices, as that happens in another class in my game. the only thing you need to know about that class is that it decompresses the voice into the VoiceChatPacketās Data array, then passes it to the corresponding SteamVoiceChatPeerās OnNewSample function.
also note that this class uses a SortedList to attempt to keep the packets in order. i do this because iām sending the data over an unreliable channel. i see a bug in the code now too where it could insert old packets to the front of the list - totally not accounting for that now. might cause problems if they come in really late, like after they should have been played.
Try to send over Unreliable Sequenced. Probably the way to go with audio. But thanks alot for this. I looked into Streaming. But I could not really wrap my head around it. Iāll have a look and see how it goes and potentially update the original thread.
EDIT: Just a question, why are you sending all the MetaData with every packet? Wonāt that just eat bandwith for no reason?
well, iām using the steamworks p2p stuff to send data. they recommend sending voice chats on the āk_EP2PSendUnreliableNoDelayā channel. so thatās what iām doing i donāt think it even has an Unreliable Sequenced channel
Well, I will have to take some day when Iām off work to look at what you have provided with the Stream. You have structured it a bit differently. I only send the actual bytes of data.
But are you actually doing a client buffer? And if so how does it build up? Delayed playback after first packet?
And if not, mind elaborating on how you actually went ahead and proceeded with it. I appreciate the code example. But I much prefer to understand what I am writing haha.
sorry, i didnāt really want to chop the code up & make it any more understandable than i had to it doesnāt really help that this is like my 4th iteration at getting this to work, so the code has grown some pretty ugly hairs over time.
i had a client buffer in my old experiments, but i took it out for this test. the lack of a buffer might also account for some of the random pops iām hearing, so thanks for reminding me! it would be pretty easy to not actually start writing in OnAudioRead until iāve collected a handful of packets.
are you referring to the IsSilence stuff in my code? that was an old experiment that i should weed out. from back when i was copying/pasting stuff into the audio buffer. at the time, i had a hard time dealing with microphone silence, so i started sending a message even when there was no audio data. i added that filter that throws out silence just today - it seems sending silence messages is not necessary at all, though maybe it will be. idk
So you are creating a audioclip with a read and write stream method. But what do you actually do with those when you recieve the data from the server? Say I recieve an array of bytes from the server. I then decompress that. What would you do with that data then? Currently Iām just creating a new AudioClip every time I get new data and playing that.
if you change that example to make the audio source loop instead, it will play a tone continuously, and you will see the OnAudioRead function keeps getting called, allowing you to continually write new data into the stream.
my code is essentially identical to this example, except where iāve added my own logic for deciding which sample to write in OnAudioRead - i just grab the next piece of data from the current packet thatās playing.
so to more specifically address your questions -
you receive an array of bytes from the server
decompress it with Steamworks.SteamUser.DecompressVoice. this gives you a new byte array of uncompressed audio.
you convert that byte array into the float array that unity prefers (i do this in VoiceChatPacket.Decode)
hang onto that float array until your OnAudioRead function is ready for it (put it in a queue so that you can read them in the correct order!)
keep track of which float array youāre currently playing, and the last position you wrote into the OnAudioRead array (my vars to keep track of this are called currentPacket and currentPacketSampleIndex). you want to keep track of this stuff so that you can pick up where you left off the next time OnAudioRead is called.
if you reach the end of the currentPacketās data, get the next packet from our queue and reset currentPacketSampleIndex to 0
does that make more sense? not sure if iām doing a good job explaining it.
How I understand it, please correct me if Iām wrong.
You have a OrderedList (or some other datatype that is fit for the job and performant for the operations). This ordered list has float arrays in it.
When you recieve a float[ ] from the server (well, not really. You decompress and stuff before but you get what I mean). You put that float array into your list. And then everytime you call OnAudioRead. You feed it the first float array (oldest). And then remove that from the list. And if there is nothing in the list. I guess generate a new float array that contains silence.
If I understand it all correctly, (which iām probably not). I have a few more questions, What frequencies have you found suitable?
What length should the streaming audioclip be? Or does it even matter when the stream option is true?
yeah, that sounds right actually. iām about to do a test without writing any āsilentā arrays to see how that goesā¦iām pretty sure it should work mostly fine without it - i just wonāt modify OnAudioReadās array if the packet queue is empty.
i donāt have good answers for the rest of your questions yet.
i havenāt messed around with frequencies much. iāve been using 11025 because for some reason i saw that number in the steam docs, and have been using it as my baseline. iāll probably test out other frequencies once i get these popping issues handled.
as for the length of the streaming clipā¦iām not really sure that size matters here. in my example, i tried making it quite a bit longer than it needs to be, in case the looping is a cause of audio pops. increasing the size doesnāt seem to cause popping to be more infrequent. iād guess thereās a minimum size requirement but i havenāt tested it.
if you pay attention to the size of the array thatās passed into OnAudioRead, Iām seeing it usually ranges from like 300-4000ish (4096?) floats long. so i assume it probably needs to be at least that many samples long? clearly iām figuring out a lot of this stuff as i go.
As for silent arrays. Dont see that being a issue too be honest. GetAvailableVoice Wont return OK if it just has silence data. Atleast from my testing.
you have to write zeroās into the OnAudioRead buffer if thereās no data in the queue, otherwise youāll hear old voice chat data on a very short loop.
i added a simple buffer mechanism that disables writing in OnAudioRead until i have X number of snapshots accumulated. i also added a check to discard old data packets that would have already been played.
this sounds really good now. it probably could be higher fidelity, but itās passable in its current state. iām using this for a VR game so Iād prefer to save cpu cycles wherever i can.
Hereās the complete file iām using. i cleaned it up a bit and added some comments.
namespace Robochase6000
{
public class VoiceChatPacket
{
public ulong PacketId;
public int Length;
public byte[] Data;
public float[] DecodedData = null;
public bool IsSilence = false;
// decodes from steam's uncompressed format to the float format that unity likes
// note that this might need to change somewhat if i mess around with the frequency.
public void Decode()
{
DecodedData = new float[Length / 2];// optimization todo :: pool this.
for (int i = 0; i < DecodedData.Length; i++) {
float value = (float) System.BitConverter.ToInt16 (Data, i * 2);
DecodedData [i] = value / (float)short.MaxValue;
}
}
}
public class SteamVoiceChatPeer : MonoBehaviour
{
public CSteamID SteamID;
public SortedList<ulong, VoiceChatPacket> PacketQueue = new SortedList<ulong, VoiceChatPacket>();
private AudioSource m_audioSource;
// how many packets we should collect before starting playback
static public int PacketBuffer = 10;
// whether or not we're currently waiting for more packets to be collected.
public bool Buffering = true;
// the current position of the playhead in the AudioClip
private int m_streamPosition = 0;
// the packet that is currently being played
private VoiceChatPacket m_currentlyPlayingPacket;
// our position in the packet that's being played.
private int m_currentlyPlayingPacketSampleIndex = 0;
// the last/current packet that was played. if we get packets older than this, we can throw them out.
private ulong m_lastPlayedPacketId = 0;
void Start()
{
m_audioSource = GetComponent<AudioSource>();
m_audioSource.loop = true;
m_audioSource.clip = AudioClip.Create ("VoiceChat", 11025 * 10, 1, 11025, true, OnAudioRead, OnAudioSetPosition);
m_audioSource.Play ();
}
void Update()
{
// if we're buffering, we're not anymore if we've gotten enough packets.
if (Buffering) {
Buffering = PacketQueue.Count < PacketBuffer;
}
}
void OnAudioRead(float[] data)
{
// wait til we have some packets saved up.
if (Buffering) {
// write out silence and gtfo
int count = 0;
while (count < data.Length) {
data [count] = 0;
m_streamPosition++;
count++;
}
}
// we've got enough packets, start writing them to the buffer
else {
// if we dont' have a packet to play, try grabbing the next one.
if (m_currentlyPlayingPacket == null) {
GrabNextPacket ();
}
int count = 0;
while (count < data.Length) {
// start at silence, and fill it in with the correct value.
float sample = 0;
if (m_currentlyPlayingPacket != null) {
sample = m_currentlyPlayingPacket.DecodedData [m_currentlyPlayingPacketSampleIndex];
// increment our current packet's playhead now that we've just read a sample
m_currentlyPlayingPacketSampleIndex++;
// mark down the last packet that was played so that we have an idea of which incoming packets are obsolete.
m_lastPlayedPacketId = m_currentlyPlayingPacket.PacketId;
// if we've reached the end of this packet, grab the next one.
if (m_currentlyPlayingPacketSampleIndex >= m_currentlyPlayingPacket.DecodedData.Length) {
GrabNextPacket ();
}
}
// write the sample to the AudioClip & update it's position
data [count] = sample;
m_streamPosition++;
count++;
}
}
}
void OnAudioSetPosition(int newPosition)
{
m_streamPosition = newPosition;
}
private void GrabNextPacket()
{
if (PacketQueue.Count > 0) {
var pair = PacketQueue.First ();
VoiceChatPacket packet = pair.Value;
if (packet != null) {
m_currentlyPlayingPacket = packet;
PacketQueue.Remove (m_currentlyPlayingPacket.PacketId);
}
} else {
m_currentlyPlayingPacket = null;
Buffering = true;
}
// reset the index.
m_currentlyPlayingPacketSampleIndex = 0;
}
public void OnNewSample(VoiceChatPacket newPacket)
{
// throw out duplicates. this should never happen...
if (PacketQueue.ContainsKey (newPacket.PacketId)) {
Debug.LogError ("already have packet " + newPacket.PacketId + ". aborting");
return;
}
// throw out old packets
if (m_lastPlayedPacketId > newPacket.PacketId) {
Debug.Log ("throwing out old packet " + newPacket.PacketId);
return;
}
// ignore silence
if (newPacket.IsSilence) {
return;
}
// convert immediately.
newPacket.Decode();
// shove it into our queue.
PacketQueue.Add (newPacket.PacketId, newPacket);
}
}
}