import numpy as np
import cv2
import cv2.aruco as aruco
import matplotlib.pyplot as plt
from scipy.spatial.transform import Rotation as Rot
import glob
from sklearn.neighbors import KDTree
'figure.figsize'] = [20,20] plt.rcParams[
The Context
In a previous post, I showed how we could use an ArUco tag to find the position and orientation of a camera. This was done using the four exterior points on the tag. If we detect more points on the tag, we could position the camera more accurately with respect to the tag.
At a high level, what we can do is: 1. Detect the exterior corners of the tag 2. Compute the homography (relationship) between the tag and its image 3. Detect points in the image 4. Transform them using the homography 5. Detect points in the tag 6. Match the transformed points from the image with points in the tag 7. Use all of the points to position the camera
As always, let’s start by importing what we need.
I’ve already calibrated the camera, which means that we have both the camera matrix (K) and the distortion parameters for the camera.
= np.array([[3286.9, 0.0 , 2032.8],
K 0.0 , 3282.3, 1519.1],
[0.0 , 0.0 , 1.0]])
[
= np.array([[2.44663616e-01],[-1.48023303e+00],[2.04607109e-03],[1.53484564e-03],[2.56349651e+00]])
dist
= np.array([[0.0,1.0,0.0],
points_3d 1.0,1.0,0.0],
[1.0,0.0,0.0],
[0.0,0.0,0.0]])
[
= 1_000 tag_size_pixels
Now we need to configure our ArUco code detector.
Firstly, specify that it should look for a “4X4_50” type tag.
= cv2.aruco.Dictionary_get(cv2.aruco.DICT_4X4_50) arucoDict
Secondly, we tell it that we want to configure the detector; in particular, we want to find the corners to a very high degree of accuracy (Sub-Pixel).
= cv2.aruco.DetectorParameters_create()
arucoParams = aruco.CORNER_REFINE_SUBPIX
arucoParams.cornerRefinementMethod = 25 arucoParams.cornerRefinementWinSize
Detect the exterior corners of the tag
Now let’s extract the exterior points of the tag.
= 'data/2021-02-24-Positioning-Using-ArUco-Tags/ArUco.jpg'
fname = cv2.imread(fname)
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray
= cv2.aruco.detectMarkers(gray, arucoDict, parameters=arucoParams)
(aruco_tag_corners, ids, rejected)
= np.array(aruco_tag_corners).squeeze() aruco_tag_corners
Compute the homography
Now let’s find the homography/relationship between the tag and the image.
def find_homography(corners,tag_size_pixels):
= corners
src_pts = np.array([[0, tag_size_pixels], [tag_size_pixels, tag_size_pixels],
dst_pts 0], [0, 0]])
[tag_size_pixels,
= cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
M, mask return(M)
= 0
tag_index = int(ids[tag_index]) tag_number
= find_homography(aruco_tag_corners[tag_index,:,:], tag_size_pixels) M
Detect points in the image
Now let’s detect the points in the image and the tag.
def detect_features(image,max_num_features=200,threshold=0.01,min_distance=50):
= cv2.goodFeaturesToTrack(image,max_num_features,threshold,min_distance)
detected_corners = (5, 5)
winSize = (-1, -1)
zeroZone = (cv2.TERM_CRITERIA_EPS + cv2.TermCriteria_COUNT, 40, 0.001)
criteria # Calculate the refined corner locations
= cv2.cornerSubPix(image, detected_corners, winSize, zeroZone, criteria)
detected_corners return(detected_corners)
= detect_features(gray)
detected_features_image for i in detected_features_image:
= i.ravel()
x,y
plt.scatter(x,y)
-1])
plt.imshow(img[:,:,:: plt.show()
# load the ArUCo dictionary
def load_tag(tag_number, tag_size_pixels):
= cv2.aruco.Dictionary_get(cv2.aruco.DICT_4X4_50)
arucoDict = np.zeros((tag_size_pixels, tag_size_pixels, 1), dtype="uint8")
tag 1);
cv2.aruco.drawMarker(arucoDict, tag_number, tag_size_pixels, tag, = tag.squeeze();
tag return(tag)
Transform the points using the homography
Now let’s transform the points detected in the image using homography. This means that a point seen in the image should be in the same x,y position as the corresponding point in the tag after the transform.
= cv2.perspectiveTransform(detected_features_image, M)
detected_features_image_transformed 1] = tag_size_pixels - detected_features_image_transformed[:,:,1] detected_features_image_transformed[:,:,
Detect points in the tag
= load_tag(tag_number, tag_size_pixels)
tag = detect_features(tag, max_num_features=200, threshold=0.1, min_distance=50) detected_features_tag
Now let’s visualise the extracted key points. I’m plotting the transformed points from the image in red and the features from the tag in blue.
for feature in detected_features_image_transformed:
= feature.ravel()
x,y ='r',alpha=0.5, s=1_000)
plt.scatter(x,y,color
for feature in detected_features_tag:
= feature.ravel()
x,y ='b',alpha=0.5, s=1_000)
plt.scatter(x,y,color
='gray')
plt.imshow(tag,cmap plt.show()
1] = tag_size_pixels - detected_features_image_transformed[:,:,1]
detected_features_image_transformed[:,:,1] = tag_size_pixels - detected_features_tag[:,:,1] detected_features_tag[:,:,
Match the transformed points from the image, with points in the tag
def match_features(detected_features_image_transformed, detected_features_tag):
= detected_features_image_transformed.squeeze() # 10 points in 3 dimensions
X = KDTree(X, leaf_size=2)
tree
= detected_features_tag.squeeze()
detected_features_tag
= []
points_3d = []
points_2d
for i in range(detected_features_tag.shape[0]):
= tree.query(detected_features_tag[i].reshape((1,-1)), k=1)
dist, ind
points_3d.append(detected_features_tag[i])int(ind)])
points_2d.append(detected_features_image[
= np.array(points_3d)
points_3d = np.array(points_2d)
points_2d = points_2d.squeeze()
points_2d
/= tag_size_pixels
points_3d
return(points_2d, points_3d)
= match_features(detected_features_image_transformed, detected_features_tag)
points_2d_interior, points_3d_interior
= np.array([[0.0,1.0],
points_3d_exterior 1.0,1.0],
[1.0,0.0],
[0.0,0.0]])
[
= aruco_tag_corners[tag_index,:,:].squeeze()
points_2d_exterior
= np.vstack([points_3d_exterior, points_3d_interior])
points_3d = np.vstack([points_2d_exterior, points_2d_interior])
points_2d
= np.hstack([points_3d, np.zeros((points_3d.shape[0],1))])
points_3d
0], points_3d[:,1])
plt.scatter(points_3d[:, plt.show()
Use all of the points to position the camera
Instead of using all the points, let’s use RANSAC, an algorithm that can pick good inliers.
= cv2.solvePnPRansac(objectPoints = points_3d,
ret, rvec, tvec, inliers = points_2d,
imagePoints = K,
cameraMatrix = dist,
distCoeffs = 5**2,
reprojectionError =1_000) iterationsCount
Now let’s find the camera centre with respect to the marker.
= Rot.from_rotvec(rvec[:,0])
r = r.as_matrix()
R = np.array(tvec)
t = -np.dot(R.T,tvec)
C print(C)
[[ 0.26481151]
[-1.9600099 ]
[ 4.03681725]]
Now, let’s visualise the coordinates axes for a quick sanity check.
= np.float32([[1,0,0], [0,1,0], [0,0,1]]).reshape(-1,3)
axis = cv2.projectPoints(axis, rvec, tvec, K, dist)
imgpts, jac = np.squeeze(imgpts)
imgpts
= points_2d_exterior[3]
[cx,cy]
-1])
plt.imshow(img[:,:,::0,0]],[cy, imgpts[0,1]], color='r', alpha=0.75, linewidth=5, label='X Axis')
plt.plot([cx,imgpts[1,0]],[cy, imgpts[1,1]], color='g', alpha=0.75, linewidth=5, label='Y Axis')
plt.plot([cx,imgpts[2,0]],[cy, imgpts[2,1]], color='b', alpha=0.75, linewidth=5, label='Z Axis')
plt.plot([cx,imgpts[
plt.legend() plt.show()