I'm currently trying to calculate the essential matrices of 3 cameras, based on a fourth camera which I use as the "base camera". I am using OpenCV to calculate the essential matrix.
Even though my points seem to be correct, when I reconstruct the pose of the camera it doesn't make any sense. For example two of the output translation vectors are (0, 0, -1). I suspect I am somehow messing up the enforcing of the internal constrains, but don't know how.
I am not sure what I am doing wrong and hope somebody can help me :)
basePoints = np.loadtxt("data/calibration/fundamental_matrix/resulting_data/camera0/extracted_points/points.txt")
for c in range(1, cameras):
print(f"caculating essential matrix for camera {c}")
cameraPoints = np.loadtxt(f"data/calibration/fundamental_matrix/resulting_data/camera{c}/extracted_points/points.txt")
cm = np.loadtxt(f"data/calibration/camera_matrix/resulting_data/camera{c}/cmatrix.txt")
em, mask = cv.findEssentialMat(basePoints, cameraPoints, cm, cv.RANSAC, 0.9999, 1)
print(f"essential matrix {c} 1:")
print(em)
print(f"enforcing internal constrain of essential matrix for camera {c}")
u, s, v = np.linalg.svd(em)
em = u.T * np.diag([1, 1, 0]) * v
print(f"enforced essential matrix {c} :")
print(em)
np.savetxt(f"data/calibration/essential_matrix_and_pose/resulting_data/camera{c}/ematrix.txt", em)
Here are some normalized sample points of my data:
base camera:
6.899999999999999467e-01 4.066666666666666347e-02
6.899999999999999467e-01 4.066666666666666347e-02
6.913333333333333552e-01 4.066666666666666347e-02
6.919999999999999485e-01 4.066666666666666347e-02
6.946666666666666545e-01 4.066666666666666347e-02
6.986666666666666581e-01 4.133333333333333304e-02
7.026666666666666616e-01 4.133333333333333304e-02
7.073333333333332584e-01 4.199999999999999567e-02
7.119999999999999662e-01 4.266666666666666524e-02
7.173333333333332673e-01 4.466666666666666702e-02
first camera:
3.893333333333333091e-01 6.600000000000000311e-02
3.900000000000000133e-01 6.600000000000000311e-02
3.906666666666666621e-01 6.533333333333332660e-02
3.913333333333333108e-01 6.466666666666666397e-02
3.939999999999999614e-01 6.333333333333332482e-02
3.979999999999999649e-01 6.066666666666666735e-02
4.019999999999999685e-01 5.799999999999999600e-02
4.066666666666666763e-01 5.600000000000000117e-02
4.119999999999999774e-01 5.333333333333332982e-02
4.186666666666666314e-01 5.133333333333333498e-02
camera matrix:
8.830992819912107734e+02 0.000000000000000000e+00 8.279597017259351333e+02
0.000000000000000000e+00 8.678798953432041117e+02 5.529431146654350187e+02
0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
Im` running with your data. But I encounter an issue where one of the points is missing in the drawing, then I look closer at your data.
base camera: point 1 and point 2 are identical which may yield matrix rank deficiency. Any co-linear pattern may cause rank deficiency. So pick points that is far away enough to ensure good compliance of epi-polar constraint
6.899999999999999467e-01 4.066666666666666347e-02
6.899999999999999467e-01 4.066666666666666347e-02
So i used all of your 10 points. it turns out to have a valid calculation output. Meaning no [0,0,-1]
E [0.3020626878915512, -0.5152322345602659, -0.2301187342264469; 0.4556056566568232, 0.3185648205272573, 0.2998902206892574; 0.4240482789694776, -0.09650735405531717, 0.04705772208405343] rotation [0.8636412841012429, 0.5040818976266604, 0.005017258321223739; -0.4976326944200423, 0.854097413780324, -0.1512590797870002; -0.0805321913287191, 0.1281368341222253, 0.9884814201091126] rodrigues [0.146966997598983, 0.04500046398453182, -0.5269188943544275] translation [-0.4250629252517419, -0.4494569974529025, 0.7856907260602315]
here is my code to get this.
#include<opencv2/opencv.hpp>
using namespace std;
using namespace cv;
#define M_PI acos(-1.0)
int main(int argc, char *argv[])
{
std::vector<cv::Point2d> Frame_A_points;
std::vector<cv::Point2d> Frame_B_points;
double focalx = 883.;
double focaly = 867.;
double focal = (focalx+focaly)/2;
cv::Point2d camera_principal_point(827, 552);
Frame_A_points.push_back(Point2d(6.899999999999999467e-01*focalx, 4.066666666666666347e-02*focaly));
Frame_A_points.push_back(Point2d(6.899999999999999467e-01*focalx, 4.066666666666666347e-02*focaly));
Frame_A_points.push_back(Point2d(6.913333333333333552e-01*focalx, 4.066666666666666347e-02*focaly));
Frame_A_points.push_back(Point2d(6.919999999999999485e-01*focalx, 4.066666666666666347e-02*focaly));
Frame_A_points.push_back(Point2d(6.946666666666666545e-01*focalx, 4.066666666666666347e-02*focaly));
Frame_A_points.push_back(Point2d(6.986666666666666581e-01*focalx, 4.133333333333333304e-02*focaly));
Frame_A_points.push_back(Point2d(7.026666666666666616e-01*focalx, 4.133333333333333304e-02*focaly));
Frame_A_points.push_back(Point2d(7.073333333333332584e-01*focalx, 4.199999999999999567e-02*focaly));
Frame_A_points.push_back(Point2d(7.119999999999999662e-01*focalx, 4.266666666666666524e-02*focaly));
Frame_A_points.push_back(Point2d(7.173333333333332673e-01*focalx, 4.466666666666666702e-02*focaly));
Frame_B_points.push_back(Point2d(3.893333333333333091e-01*focalx, 6.600000000000000311e-02*focaly));
Frame_B_points.push_back(Point2d(3.900000000000000133e-01*focalx, 6.600000000000000311e-02*focaly));
Frame_B_points.push_back(Point2d(3.906666666666666621e-01*focalx, 6.533333333333332660e-02*focaly));
Frame_B_points.push_back(Point2d(3.913333333333333108e-01*focalx, 6.466666666666666397e-02*focaly));
Frame_B_points.push_back(Point2d(3.939999999999999614e-01*focalx, 6.333333333333332482e-02*focaly));
Frame_B_points.push_back(Point2d(3.979999999999999649e-01*focalx, 6.066666666666666735e-02*focaly));
Frame_B_points.push_back(Point2d(4.019999999999999685e-01*focalx, 5.799999999999999600e-02*focaly));
Frame_B_points.push_back(Point2d(4.066666666666666763e-01*focalx, 5.600000000000000117e-02*focaly));
Frame_B_points.push_back(Point2d(4.119999999999999774e-01*focalx, 5.333333333333332982e-02*focaly));
Frame_B_points.push_back(Point2d(4.186666666666666314e-01*focalx, 5.133333333333333498e-02*focaly));
std::cout<< Frame_A_points<< std::endl;
Mat x(1000, 1000, CV_8UC3, Scalar(0, 0, 0));
Mat y(1000, 1000, CV_8UC3, Scalar(0, 0, 0));
double d = 0;
for (int i = 0; i < Frame_A_points.size(); i++)
{
circle(x, Frame_A_points[i], 1, Scalar(0, 0, 255));
circle(y, Frame_B_points[i], 1, Scalar(0, 255, 255));
d += norm(Frame_A_points[i] - Frame_B_points[i]);
}
cout << "Mean marker distance" << d / Frame_A_points.size() << "\n";
cv::Mat essential_matrix = cv::findEssentialMat(Frame_A_points, Frame_B_points, focal, camera_principal_point, cv::LMEDS);
cv::Mat rotation=Mat::zeros(3,3,CV_64F), translation=Mat::zeros(3, 1, CV_64F);
cv::recoverPose(essential_matrix, Frame_A_points, Frame_B_points, rotation, translation, focal, camera_principal_point);
cv::Mat rot(3, 1, CV_64F);
cv::Rodrigues(rotation, rot);
//cout << "Essai " << i << " with " << Frame_A_points.size() << "points\n";
std::cout << "E " << essential_matrix << std::endl;
std::cout << "rotation " << rotation << std::endl;
std::cout << "rodrigues " << rot.t() << std::endl;
std::cout << "translation " << translation.t() << std::endl;
imshow("ptx", x);
imshow("pty", y);
waitKey();
}
Noted that
The translation vector is only up to scale. means you cant recover the full pose without depth Z .
You can take reference to how other people get the up to scale translation vector by multipling the estimated T with ground truth scale
https://github.com/alishobeiri/Monocular-Video-Odometery/blob/master/monovideoodometery.py
if self.id < 2:
E, _ = cv2.findEssentialMat(self.good_new, self.good_old, self.focal, self.pp, cv2.RANSAC, 0.999, 1.0, None)
_, self.R, self.t, _ = cv2.recoverPose(E, self.good_old, self.good_new, self.R, self.t, self.focal, self.pp, None)
else:
E, _ = cv2.findEssentialMat(self.good_new, self.good_old, self.focal, self.pp, cv2.RANSAC, 0.999, 1.0, None)
_, R, t, _ = cv2.recoverPose(E, self.good_old, self.good_new, self.R.copy(), self.t.copy(), self.focal, self.pp, None)
absolute_scale = self.get_absolute_scale()
if (absolute_scale > 0.1 and abs(t[2][0]) > abs(t[0][0]) and abs(t[2][0]) > abs(t[1][0])):
self.t = self.t + absolute_scale*self.R.dot(t)
self.R = R.dot(self.R)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.