Using the code below, I am trying to use the output Face Landmarks from Google’s MediaPipe to estimate head pose. I am using OpenCV’s solvePnP. The code compiles and runs, but the output rotation Quaternions are 90% zeros, with the odd rotation values that are never the same and always inaccurate. There is jitter in the landmarks, but they are normally about a pixel, so I do not believe the error is with the MediaPipe results. I believe the error is with the solvePnP implementation. I am truly at a loss, as even with the jitter, I wouldn’t expect Quaternions of zero or so much deviation in the results. Any ideas?
private IEnumerator EstimatePositionAndHeadPose(NormalizedLandmarkList landmarks)
{
var camera_matrix = new Mat(3, 3, CvType.CV_32F);
var dist_coeffs = new MatOfDouble(1, 4, CvType.CV_32F);
Mat rvec = new Mat(1, 3, CvType.CV_64FC1);
Mat tvec = new Mat(1, 3, CvType.CV_64FC1);
camera_matrix.put(0, 0, 600, 0, 400, 0, 600, 300, 0, 0, 1);
dist_coeffs.put(0, 0, 600, 0, 400, 2);
OpenCVForUnity.CoreModule.Point[] imagePoints = new OpenCVForUnity.CoreModule.Point[6];
imagePoints[0] = new OpenCVForUnity.CoreModule.Point(landmarks.Landmark[1].X * 800, landmarks.Landmark[1].Y * 600);
imagePoints[1] = new OpenCVForUnity.CoreModule.Point(landmarks.Landmark[52].X * 800, landmarks.Landmark[52].Y * 600);
imagePoints[2] = new OpenCVForUnity.CoreModule.Point(landmarks.Landmark[226].X * 800, landmarks.Landmark[226].Y * 600);
imagePoints[3] = new OpenCVForUnity.CoreModule.Point(landmarks.Landmark[446].X * 800, landmarks.Landmark[446].Y * 600);
imagePoints[4] = new OpenCVForUnity.CoreModule.Point(landmarks.Landmark[57].X * 800, landmarks.Landmark[57].Y * 600);
imagePoints[5] = new OpenCVForUnity.CoreModule.Point(landmarks.Landmark[287].X * 800, landmarks.Landmark[287].Y * 600);
var image_points = new MatOfPoint2f(imagePoints);
OpenCVForUnity.CoreModule.Point3[] objectPoints = new OpenCVForUnity.CoreModule.Point3[6];
objectPoints[0] = new OpenCVForUnity.CoreModule.Point3(0, 0, 0);
objectPoints[1] = new OpenCVForUnity.CoreModule.Point3(0, -330, -65);
objectPoints[2] = new OpenCVForUnity.CoreModule.Point3(-225, 170, -135);
objectPoints[3] = new OpenCVForUnity.CoreModule.Point3(225, 170, -135);
objectPoints[4] = new OpenCVForUnity.CoreModule.Point3(-150, -150, -125);
objectPoints[5] = new OpenCVForUnity.CoreModule.Point3(150, -150, -125);
MatOfPoint3f object_points = new MatOfPoint3f(objectPoints);
Calib3d.solvePnP(object_points, image_points, camera_matrix, dist_coeffs, rvec, tvec);
// Convert to unity pose data.
double[] rvecArr = new double[3];
rvec.get(0, 0, rvecArr);
double[] tvecArr = new double[3];
tvec.get(0, 0, tvecArr);
PoseData poseData = ARUtils.ConvertRvecTvecToPoseData(rvecArr, tvecArr);
//Debug.Log(poseData.rot);
var outQuat = poseData.rot;
rearFaceCube.transform.localRotation = outQuat;
yield return null;
}
An example out the output of the Quaternion (outQuat) is below