tensorflow lite raspberry pi object detection using usb webcam code example

Example: tensorflow lite raspberry pi object detection using usb webcam

import time
import re
import numpy as np

from tflite_runtime.interpreter import Interpreter
import cv2

CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480


def load_labels(path):
  """Loads the labels file. Supports files with or without index numbers."""
  with open(path, 'r', encoding='utf-8') as f:
    lines = f.readlines()
    labels = {}
    for row_number, content in enumerate(lines):
      pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
      if len(pair) == 2 and pair[0].strip().isdigit():
        labels[int(pair[0])] = pair[1].strip()
      else:
        labels[row_number] = pair[0].strip()
  return labels


def set_input_tensor(interpreter, image):
  """Sets the input tensor."""
  tensor_index = interpreter.get_input_details()[0]['index']
  input_tensor = interpreter.tensor(tensor_index)()[0]
  
  input_tensor[:, :] = cv2.resize(image,(300,300))
  


def get_output_tensor(interpreter, index):
  """Returns the output tensor at the given index."""
  output_details = interpreter.get_output_details()[index]
  tensor = np.squeeze(interpreter.get_tensor(output_details['index']))
  return tensor


def detect_objects(interpreter, image, threshold):
  """Returns a list of detection results, each a dictionary of object info."""
  set_input_tensor(interpreter, image)
  interpreter.invoke()

  # Get all output details
  boxes = get_output_tensor(interpreter, 0)
  classes = get_output_tensor(interpreter, 1)
  scores = get_output_tensor(interpreter, 2)
  count = int(get_output_tensor(interpreter, 3))

  results = []
  for i in range(count):
    if scores[i] >= threshold:
      result = {
          'bounding_box': boxes[i],
          'class_id': classes[i],
          'score': scores[i]
      }
      results.append(result)
  return results


def main():
  labels = load_labels('coco_labels.txt')
  interpreter = Interpreter('detect.tflite')
  interpreter.allocate_tensors()
  _, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']
  cam = cv2.VideoCapture(1)
  try:
      ret = True
      while ret:
          for i in range(10):
              ret, frame = cam.read()
          image = frame
          
          start_time = time.monotonic()
          results = detect_objects(interpreter, image, 0.5)
          elapsed_ms = (time.monotonic() - start_time) * 1000
          for obj in results:
              ymin, xmin, ymax, xmax = obj['bounding_box']
              xmin = int(xmin * CAMERA_WIDTH)
              xmax = int(xmax * CAMERA_WIDTH)
              ymin = int(ymin * CAMERA_HEIGHT)
              ymax = int(ymax * CAMERA_HEIGHT)
              classid = labels[obj['class_id']]
              score = obj['score']
              y = ymin + 10
              if ymin - 10 > 10:
                  y = ymin - 10
              cv2.rectangle(image, (xmin, ymin),(xmax, ymax), (255, 255, 255), 1)
              cv2.putText(image,classid+" "+ str(round(elapsed_ms)),(xmin, y),
                          cv2.FONT_HERSHEY_PLAIN,
                          1.5, (0, 255, 0),  1,
                          cv2.LINE_AA,False)
        
          cv2.imshow('r',image)
          if cv2.waitKey(1) & 0xFF == ord('q'):
              break
          #print(results)
  finally:
      cam.release()
      cv2.destroyAllWindows()

    

if __name__ == '__main__':
  main()