SKU-110K.yaml 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # Ultralytics YOLO 🚀, AGPL-3.0 license
  2. # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
  3. # Example usage: yolo train data=SKU-110K.yaml
  4. # parent
  5. # ├── ultralytics
  6. # └── datasets
  7. # └── SKU-110K ← downloads here (13.6 GB)
  8. # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
  9. path: ../datasets/SKU-110K # dataset root dir
  10. train: train.txt # train images (relative to 'path') 8219 images
  11. val: val.txt # val images (relative to 'path') 588 images
  12. test: test.txt # test images (optional) 2936 images
  13. # Classes
  14. names:
  15. 0: object
  16. # Download script/URL (optional) ---------------------------------------------------------------------------------------
  17. download: |
  18. import shutil
  19. from pathlib import Path
  20. import numpy as np
  21. import pandas as pd
  22. from tqdm import tqdm
  23. from ultralytics.utils.downloads import download
  24. from ultralytics.utils.ops import xyxy2xywh
  25. # Download
  26. dir = Path(yaml['path']) # dataset root dir
  27. parent = Path(dir.parent) # download dir
  28. urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
  29. download(urls, dir=parent)
  30. # Rename directories
  31. if dir.exists():
  32. shutil.rmtree(dir)
  33. (parent / 'SKU110K_fixed').rename(dir) # rename dir
  34. (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
  35. # Convert labels
  36. names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
  37. for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
  38. x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
  39. images, unique_images = x[:, 0], np.unique(x[:, 0])
  40. with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
  41. f.writelines(f'./images/{s}\n' for s in unique_images)
  42. for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
  43. cls = 0 # single-class dataset
  44. with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
  45. for r in x[images == im]:
  46. w, h = r[6], r[7] # image width, height
  47. xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
  48. f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label