123456789101112131415 |
- from typing import Tuple
- import torchvision
- from torch import Tensor
- class KineticsWithVideoId(torchvision.datasets.Kinetics):
- def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
- video, audio, info, video_idx = self.video_clips.get_clip(idx)
- label = self.samples[video_idx][1]
- if self.transform is not None:
- video = self.transform(video)
- return video, audio, label, video_idx
|