import cv2 import torch import numpy as np import onnxruntime as ort from utils.general import non_max_suppression, scale_coords from utils.BaseDetector import baseDet from utils.datasets import letterbox import logging logging.basicConfig(filename='detection_log.txt', level=logging.INFO, format='%(asctime)s - %(message)s') class Detector(baseDet): def __init__(self): super(Detector, self).__init__() self.device = None self.weights = None self.session = None self.names = None self.img_size = 640 self.init_model() self.build_config() def init_model(self): self.weights = 'weights/yolov5s.onnx' self.device = '0' if torch.cuda.is_available() else 'cpu' #self.session = ort.InferenceSession(self.weights, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) self.session = ort.InferenceSession(self.weights, providers=['CUDAExecutionProvider']) self.names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] def preprocess(self, img): img0 = img.copy() img = cv2.resize(img, (640, 640)) img = letterbox(img, new_shape=self.img_size)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = img.astype(np.float32) img /= 255.0 if img.ndim == 3: img = np.expand_dims(img, axis=0) return img0, img def detect(self, im): im0, img = self.preprocess(im) # Prepare input for ONNX model input_name = self.session.get_inputs()[0].name pred = self.session.run(None, {input_name: img})[0] # Run inference pred = pred.astype(np.float32) pred = non_max_suppression(torch.from_numpy(pred), self.threshold, 0.4) pred_boxes = [] for det in pred: if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *x, conf, cls_id in det: lbl = self.names[int(cls_id)] if lbl not in ['person', 'car', 'truck']: # Filter unwanted labels continue x1, y1 = int(x[0]), int(x[1]) x2, y2 = int(x[2]), int(x[3]) pred_boxes.append((x1, y1, x2, y2, lbl, conf)) return im0, pred_boxes