Source code for tonic.datasets.hsd

import os
from typing import Callable, Optional

import h5py
import numpy as np

from tonic.dataset import Dataset
from tonic.io import make_structured_array


[docs]class HSD(Dataset): """Heidelberg Spiking Dataset <https://arxiv.org/abs/1910.07407> contains the Spiking Heidelberg Digits (SHD) and the Spiking Speech Commands dataset (SSC).""" base_url = "https://zenkelab.org/datasets/" sensor_size = (700, 1, 1) dtype = np.dtype([("t", int), ("x", int), ("p", int)]) ordering = dtype.names
[docs] def __getitem__(self, index): file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r") # adding artificial polarity of 1 and convert to microseconds events = make_structured_array( file["spikes/times"][index] * 1e6, file["spikes/units"][index], 1, dtype=self.dtype, ) target = file["labels"][index].astype(int) if self.transform is not None: events = self.transform(events) if self.target_transform is not None: target = self.target_transform(target) return events, target
[docs] def __len__(self): file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r") return len(file["labels"])
def _check_exists(self): return ( self._is_file_present() and self._folder_contains_at_least_n_files_of_type(1, ".h5") )
[docs]class SHD(HSD): """`Spiking Heidelberg Digits <https://zenkelab.org/resources/spiking-heidelberg-datasets-shd/>`_ :: @article{cramer2020heidelberg, title={The heidelberg spiking data sets for the systematic evaluation of spiking neural networks}, author={Cramer, Benjamin and Stradmann, Yannik and Schemmel, Johannes and Zenke, Friedemann}, journal={IEEE Transactions on Neural Networks and Learning Systems}, year={2020}, publisher={IEEE} } Parameters: save_to (string): Location to save files to on disk. Will put files in an 'hsd' subfolder. train (bool): If True, uses training subset, otherwise testing subset. transform (callable, optional): A callable of transforms to apply to the data. target_transform (callable, optional): A callable of transforms to apply to the targets/labels. Returns: A dataset object that can be indexed or iterated over. One sample returns a tuple of (events, targets). """ test_zip = "shd_test.h5.zip" train_zip = "shd_train.h5.zip" test_md5 = "1503a5064faa34311c398fb0a1ed0a6f" train_md5 = "f3252aeb598ac776c1b526422d90eecb" folder_name = "" def __init__( self, save_to: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, ): super().__init__( save_to, transform=transform, target_transform=target_transform, ) if train: self.url = self.base_url + self.train_zip self.filename = self.train_zip self.file_md5 = self.train_md5 else: self.url = self.base_url + self.test_zip self.filename = self.test_zip self.file_md5 = self.test_md5 self.data_filename = self.filename[:-4] if not self._check_exists(): self.download() file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r") self.classes = file["extra/keys"][()] self._speaker = file["extra/speaker"][()] @property def speaker(self): return self._speaker[()]
[docs]class SSC(HSD): """`Spiking Speech Commands <https://zenkelab.org/resources/spiking-heidelberg-datasets-shd/>`_ :: @article{cramer2020heidelberg, title={The heidelberg spiking data sets for the systematic evaluation of spiking neural networks}, author={Cramer, Benjamin and Stradmann, Yannik and Schemmel, Johannes and Zenke, Friedemann}, journal={IEEE Transactions on Neural Networks and Learning Systems}, year={2020}, publisher={IEEE} } Parameters: save_to (string): Location to save files to on disk. Will put files in an 'hsd' subfolder. split (string): One of 'train', 'test' or 'valid'. transform (callable, optional): A callable of transforms to apply to the data. target_transform (callable, optional): A callable of transforms to apply to the targets/labels. Returns: A dataset object that can be indexed or iterated over. One sample returns a tuple of (events, targets). """ test_zip = "ssc_test.h5.zip" train_zip = "ssc_train.h5.zip" valid_zip = "ssc_valid.h5.zip" test_md5 = "a35ff1e9cffdd02a20eb850c17c37748" train_md5 = "d102be95e7144fcc0553d1f45ba94170" valid_md5 = "b4eee3516a4a90dd0c71a6ac23a8ae43" folder_name = "" def __init__( self, save_to: str, split: str = "train", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, ): super().__init__( save_to, transform=transform, target_transform=target_transform ) if split == "train": self.url = self.base_url + self.train_zip self.filename = self.train_zip self.file_md5 = self.train_md5 elif split == "test": self.url = self.base_url + self.test_zip self.filename = self.test_zip self.file_md5 = self.test_md5 elif split == "valid": self.url = self.base_url + self.valid_zip self.filename = self.valid_zip self.file_md5 = self.valid_md5 self.data_filename = self.filename[:-4] if not self._check_exists(): self.download() file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r") self.classes = file["extra/keys"][()] self._speaker = file["extra/speaker"][()]