Object Tracking Robot
Here is the blog post of the AI Robotics Case - Open Source AI Object Tracking with SMD
The Object Tracking Robot is a project that uses AI-powered image processing to follow the specified object. The robot has a webcam that captures video of the environment. The captured images are processed using the MobileNetSSD model.
The NVIDIA Jetson Nano is used as the brain of the robot; the Jetson Nano's GPU is a powerful tool for image processing and AI applications.
The robot searches for the input object (the list of objects is shown in the code), if it sees the object with high accuracy, it goes directly to the object. When it gets close to it, it stops and looks at the object. However, if the object is out of frame, the robot first looks in the direction in which the object was last seen, and then on the other side. If the object is found in one of these directions, the robot will turn to that side and continue to follow the object.
The project is coded in Python, using the SMD Python Library.
Here is the code of the project:
#Import the neccesary libraries
import argparse
import cv2
from smd.red import *
import os
# Construct the argument parse
parser = argparse.ArgumentParser(
description='Script to run object detection network')
parser.add_argument("--prototxt", default = "MobileNetSSD.prototxt",
help = 'Path to text network file')
parser.add_argument("--weights", default="MobileNetSSD.caffemodel",
help='Path to weights')
parser.add_argument("--thr", default=0.2, type=float, help="Confidence threshold to filter out weak detections")
parser.add_argument("--close", default=0.35, type=float, help="How much robot is getting close. Lower values robot will stop further away")
parser.add_argument("--object", default='person', help="What object robot should follow. Objects are:"
"background, aeroplane, bicycle, bird, boat, bottle, bus, "
"car, cat, chair, cow, diningtable, dog, horse, motorbike, person, "
"pottedplant, sheep, sofa, train, tvmonitor")
args = parser.parse_args()
# Labels of Network.
classNames = { 0: 'background',
1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
14: 'motorbike', 15: 'person', 16: 'pottedplant',
17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' }
# Open camera device.
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
size = (frame_width, frame_height)
result = cv2.VideoWriter('recording.avi', cv2.VideoWriter_fourcc(*'MJPG'), 5, size)
#Load the Caffe model
net = cv2.dnn.readNetFromCaffe(args.prototxt, args.weights)
usb_port = "/dev/ttyUSB0"
# We need to read and write data to usb port jetson nano normally doesn't allow it
os.system('sudo chmod a+rw ' + usb_port)
class Robot:
def __init__(self):
# SMD setup
self.port = usb_port
self.m = Master(self.port)
self.m.attach(Red(0))
self.m.attach(Red(1))
self.m.set_connected_modules(0,["Servo_1"])
self.servo_pos = 90 #Current servo position
self.last_error = 0 #Last error of motors used for PID
self.las_dir = 0 #Last direction object is detected
self.last_error_servo = 0 #Last error of servo used for PID
# Motor setup
# SMD 0 is left motor
# SMD 1 is right motor
self.m.set_shaft_cpr(0, 6533)
self.m.set_shaft_rpm(0, 100)
self.m.set_shaft_cpr(1, 6533)
self.m.set_shaft_rpm(1, 100)
self.m.set_operation_mode(0, OperationMode.Velocity)
self.m.set_operation_mode(1, OperationMode.Velocity)
self.m.enable_torque(0, True)
self.m.enable_torque(1, True)
self.m.set_servo(0, 1, 90)
# Robot parameters
self.rpm_left = 0 # Initial RPM for the left motor
self.rpm_right = 0 # Intial RPM for the right motor
# Drives the motors given error
def motor_drive(self, error):
kp = 0.2
kd = 0.1
# PID calculation for motors max 100 min -100
self.rpm_left = min(100,max(-100,int(kp*error + 80 + kd*(self.last_error - error))))
self.rpm_right = min(100,max(-100,int(-kp * error + 80 - kd*(self.last_error - error))))
self.last_error = error
# Motor speeds
print("left speed:{},right speed:{}".format(self.rpm_left, self.rpm_right))
self.m.set_velocity(1, self.rpm_left)
self.m.set_velocity(0, -self.rpm_right)
# Stops the motors
def stop(self):
self.m.set_velocity(1,0)
self.m.set_velocity(0,0)
#Calculates ratio between frame and the total are of object used for estimateing distance to object
def calc_object_ratio_to_frame(self, xl, xr, yl, yr):
return ((xr - xl) * (yr - yl))/(640*480)
# AI object detection
def detect(self):
# Capture frame-by-frame
ret, frame = cap.read()
frame_resized = cv2.resize(frame,(300,300)) # Resize frame for prediction
# We perform a mean subtraction (127.5, 127.5, 127.5) to normalize the input;
# after executing this command our "blob" now has the shape: (1, 3, 300, 300)
blob = cv2.dnn.blobFromImage(frame_resized, 0.007843, (300, 300), (127.5, 127.5, 127.5), False)
# Set to network the input blob
net.setInput(blob)
# Prediction of network
detections = net.forward()
# Size of frame resize (300x300)
cols = frame_resized.shape[1]
rows = frame_resized.shape[0]
# For get the class and location of object detected,
# There is a fix index for class, location and confidence
# value in @detections array .
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2] #Confidence of prediction
if confidence > args.thr: # Filter prediction
class_id = int(detections[0, 0, i, 1]) # Class label
# Object location
xLeftBottom = int(detections[0, 0, i, 3] * cols)
yLeftBottom = int(detections[0, 0, i, 4] * rows)
xRightTop = int(detections[0, 0, i, 5] * cols)
yRightTop = int(detections[0, 0, i, 6] * rows)
# Factor for scale to original size of frame
heightFactor = frame.shape[0]/300.0
widthFactor = frame.shape[1]/300.0
# Scale object detection to frame
xLeftBottom = int(widthFactor * xLeftBottom)
yLeftBottom = int(heightFactor * yLeftBottom)
xRightTop = int(widthFactor * xRightTop)
# Draw rectangle and name of the object to the frame
yRightTop = int(heightFactor * yRightTop)
if class_id in classNames:
label = classNames[class_id] + ": " + str(confidence)
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
yLeftBottom = max(yLeftBottom, labelSize[1])
cv2.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop),(0, 255, 0))
cv2.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]),
(xLeftBottom + labelSize[0], yLeftBottom + baseLine),
(255, 255, 255), cv2.FILLED)
cv2.putText(frame, label, (xLeftBottom, yLeftBottom),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
# Save the frame for the video
result.write(frame)
# Check if object is what we are looking for
if classNames[class_id] == args.object:
return class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom
# If we don't find our object return 0's
return 0,0,0,0,0,0
# Look for object using the servo in the latest direction object is detected
def look_for_object(self, dir):
self.m.set_servo(1, 5, 120)
if dir == 0:
for i in reversed(range(0,10)):
class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom = self.detect()
if confidence != 0 and classNames[class_id] == args.object:
return i*10
self.m.set_servo(0,1,i*10)
while True:
for i in range(0,19):
class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom = self.detect()
if confidence != 0 and classNames[class_id] == args.object:
return i*10
self.m.set_servo(0,1,i*10)
for i in reversed(range(0,19)):
class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom = self.detect()
if confidence != 0 and classNames[class_id] == args.object:
return i*10
self.m.set_servo(0,1,i*10)
else:
for i in range(9,19):
class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom = self.detect()
if confidence != 0 and classNames[class_id] == args.object:
return i*10
self.m.set_servo(0,1,i*10)
while True:
for i in reversed(range(0,19)):
class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom = self.detect()
if confidence != 0 and classNames[class_id] == args.object:
return i*10
self.m.set_servo(0,1,i*10)
for i in range(0,19):
class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom = self.detect()
if confidence != 0 and classNames[class_id] == args.object:
return i*10
self.m.set_servo(0,1,i*10)
# Turn the body of the robot to the specifed degree
def turn(self, deg):
d = 0.5
deg -= 90
self.m.set_velocity(0, deg*d)
self.m.set_velocity(1, deg*d)
self.m.set_servo(0, 1, 95)
self.servo_pos = 95
time.sleep(1)
self.m.set_velocity(0, 0)
self.m.set_velocity(1, 0)
# If a object is close track it with only the servo
def track_object_close(self, error):
kp = 0.02
kd = 0.001
# PID calculation for servo amx 180 min 0
change = kp*error + kd*(self.last_error_servo - error)
self.servo_pos = min(180, max(0, self.servo_pos + change))
self.last_error_servo = error
# Servo position
print(f"Servo:{self.servo_pos}")
self.m.set_servo(0, 1, int(self.servo_pos))
# If object is getting out of frame turn the body
if self.servo_pos > 95:
self.las_dir = 1
else:
self.las_dir = 0
if self.servo_pos > 160:
self.turn(self.servo_pos)
elif self.servo_pos < 20:
self.turn(self.servo_pos)
# Main loop of our object
def run(self):
while True:
# Object detection
class_id, confidence, yRightTop, yLeftBottom, xRightTop, xLeftBottom = self.detect()
# If no object is detected look for it
if confidence == 0:
self.stop()
deg = self.look_for_object(self.las_dir)
self.turn(deg)
continue
# Calculates the error, error is difference between left and right contours to the frame lenght
rightDif = 640 - xRightTop
leftDif = xLeftBottom
error = rightDif - leftDif
ratio = self.calc_object_ratio_to_frame(xLeftBottom,xRightTop,yLeftBottom,yRightTop)
# Error is multiplied with ratio to make robot take faster turns at close and slower at distance
error *= ratio
# Determine if object is left or right from the error
if error > 0:
self.las_dir = 1
else:
self.las_dir = 0
# If object is close enough stop and start tracking
if ratio < args.close:
self.m.set_servo(0, 1, 95)
self.motor_drive(error)
else:
self.stop()
self.track_object_close(error/ratio)
robo = Robot()
try:
robo.run()
except KeyboardInterrupt:
robo.stop()
cap.release()
result.release()
Last updated