I’m trying to use mediapipe with unity so I can track my hands in AR. Python will act as server and when receiving data, it will send back the landmarks of my hands. Unity will be the client and send jpg image data from mobile camera and receive the landmarks data. It works well when I tested it in the loopback ip(127.0.0.1) but when I test it in the server ip, it connets to the server but right after spits out error saying that “unable to read data from the transport connection”. Can anybody help? Thank you in advance.
using System;
using System.Collections.Generic;
using System.Net.Sockets;
using System.Text;
using UnityEngine;
using UnityEngine.UI;
using System.Threading.Tasks;
using System.Linq;
using System.IO;
using System.Drawing;
using System.Threading;
public class CameraManager : MonoBehaviour
{
public RawImage display;
public InputField inputField;
public int width = 1280;
public int height = 720;
public int frameRate = 30;
public string receivedData;
WebCamTexture camTexture;
int currentIndex = 0;
bool isConnected = false;
float timeCount = 0.0f;
string serverIP;
int port = 8000;
TcpClient client;
NetworkStream stream;
Thread receiveThread;
private void Start()
{
if (camTexture != null)
{
display.texture = null;
camTexture.Stop();
camTexture = null;
}
WebCamDevice device = WebCamTexture.devices[currentIndex];
Debug.Log(device.name);
camTexture = new WebCamTexture(device.name, width, height);
display.texture = camTexture;
Debug.Log(camTexture.height);
camTexture.Play();
}
private void Update()
{
if (isConnected)
{
if (timeCount > 1.0f / frameRate)
{
SendDataAsync();
timeCount = 0.0f;
}
timeCount += Time.deltaTime;
}
}
public void ConnectToServer()
{
serverIP = inputField.text;
Debug.Log(serverIP);
try
{
client = new TcpClient(serverIP, port);
stream = client.GetStream();
Debug.Log("Connected to server");
isConnected = true;
receiveThread = new Thread(new ThreadStart(ReceiveData));
receiveThread.IsBackground = true;
receiveThread.Start();
}
catch (Exception e)
{
Debug.LogError("Socket error: " + e.Message);
}
}
async void SendDataAsync()
{
try
{
byte[] separator = Encoding.UTF8.GetBytes("<END>");
List<byte> sendData = new List<byte>();
sendData.AddRange(CompressImage(ConvertWebCamTextureToTexture2D(camTexture)));
sendData.AddRange(separator);
if (stream.CanWrite)
{
await stream.WriteAsync(sendData.ToArray(), 0, sendData.Count);
}
}
catch (Exception e)
{
Debug.LogError($"Error in SendDataAsync: {e.Message}");
}
}
void ReceiveData()
{
while (true)
{
try
{
byte[] lengthBuffer = new byte[4];
int totalRead = 0;
int bytesRead = stream.Read(lengthBuffer, 0, lengthBuffer.Length);
if (bytesRead == lengthBuffer.Length)
{
int messageLength = BitConverter.ToInt32(lengthBuffer.Reverse().ToArray(), 0);
byte[] dataBuffer = new byte[messageLength];
while (totalRead < messageLength)
{
int read = stream.Read(dataBuffer, 0, messageLength);
totalRead += read;
}
receivedData = Encoding.UTF8.GetString(dataBuffer);
Debug.Log($"Received data of length: {messageLength}");
Debug.Log(receivedData);
}
}
catch (Exception e)
{
Debug.LogError("Exception: " + e.Message);
}
}
}
Texture2D ConvertWebCamTextureToTexture2D(WebCamTexture webCamTexture)
{
Texture2D texture = new Texture2D(webCamTexture.width, webCamTexture.height);
texture.SetPixels32(webCamTexture.GetPixels32());
texture.Apply();
return texture;
}
byte[] CompressImage(Texture2D texture)
{
byte[] imageBytes = texture.EncodeToJPG();
return imageBytes;
}
}
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import socket
import struct
width, height = 1280, 720
detector = HandDetector(maxHands=1, detectionCon=0.5)
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind(('0.0.0.0', 8000))
server_socket.listen(1)
conn, addr = server_socket.accept()
separator = b"<END>"
recv_data = b""
while True:
packet = conn.recv(1280 * 720)
# 4096
recv_data += packet
if separator in recv_data:
img_data, _, recv_data = recv_data.partition(separator)
nparr = np.frombuffer(img_data, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
hands, img = detector.findHands(img)
cv2.imshow('Received Image', img)
data = []
if hands:
hand = hands[0]
lmList = hand['lmList']
for lm in lmList:
data.extend([lm[0], height - lm[1], lm[2]])
send_data = str.encode(str(data))
data_length = struct.pack('>I', len(send_data))
conn.sendall(data_length + send_data)
if cv2.waitKey(1) & 0xFF == 27:
break