datasets.py 440 B

123456789101112131415
  1. from typing import Tuple
  2. import torchvision
  3. from torch import Tensor
  4. class KineticsWithVideoId(torchvision.datasets.Kinetics):
  5. def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
  6. video, audio, info, video_idx = self.video_clips.get_clip(idx)
  7. label = self.samples[video_idx][1]
  8. if self.transform is not None:
  9. video = self.transform(video)
  10. return video, audio, label, video_idx