12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- import cv2
- import torch
- import numpy as np
- import onnxruntime as ort
- from utils.general import non_max_suppression, scale_coords
- from utils.BaseDetector import baseDet
- from utils.datasets import letterbox
- import logging
- logging.basicConfig(filename='detection_log.txt', level=logging.INFO,
- format='%(asctime)s - %(message)s')
- class Detector(baseDet):
- def __init__(self):
- super(Detector, self).__init__()
- self.device = None
- self.weights = None
- self.session = None
- self.names = None
- self.img_size = 640
- self.init_model()
- self.build_config()
- def init_model(self):
- self.weights = 'weights/yolov5s.onnx'
- self.device = '0' if torch.cuda.is_available() else 'cpu'
- #self.session = ort.InferenceSession(self.weights, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
- self.session = ort.InferenceSession(self.weights, providers=['CUDAExecutionProvider'])
- self.names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
- 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
- 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
- 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
- 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
- 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
- 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
- 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
- 'hair drier', 'toothbrush']
- def preprocess(self, img):
- img0 = img.copy()
- img = cv2.resize(img, (640, 640))
- img = letterbox(img, new_shape=self.img_size)[0]
- img = img[:, :, ::-1].transpose(2, 0, 1)
- img = np.ascontiguousarray(img)
- img = img.astype(np.float32)
- img /= 255.0
- if img.ndim == 3:
- img = np.expand_dims(img, axis=0)
- return img0, img
- def detect(self, im):
- im0, img = self.preprocess(im)
- # Prepare input for ONNX model
- input_name = self.session.get_inputs()[0].name
- pred = self.session.run(None, {input_name: img})[0] # Run inference
- pred = pred.astype(np.float32)
- pred = non_max_suppression(torch.from_numpy(pred), self.threshold, 0.4)
- pred_boxes = []
- for det in pred:
- if det is not None and len(det):
- det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
- for *x, conf, cls_id in det:
- lbl = self.names[int(cls_id)]
- if lbl not in ['person', 'car', 'truck']: # Filter unwanted labels
- continue
- x1, y1 = int(x[0]), int(x[1])
- x2, y2 = int(x[2]), int(x[3])
- pred_boxes.append((x1, y1, x2, y2, lbl, conf))
- return im0, pred_boxes
|