import os
from typing import Callable, Optional
import h5py
import numpy as np
from tonic.dataset import Dataset
from tonic.io import make_structured_array
[docs]class HSD(Dataset):
"""Heidelberg Spiking Dataset <https://arxiv.org/abs/1910.07407> contains the Spiking
Heidelberg Digits (SHD) and the Spiking Speech Commands dataset (SSC)."""
base_url = "https://zenkelab.org/datasets/"
sensor_size = (700, 1, 1)
dtype = np.dtype([("t", int), ("x", int), ("p", int)])
ordering = dtype.names
[docs] def __getitem__(self, index):
file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r")
# adding artificial polarity of 1 and convert to microseconds
events = make_structured_array(
file["spikes/times"][index] * 1e6,
file["spikes/units"][index],
1,
dtype=self.dtype,
)
target = file["labels"][index].astype(int)
if self.transform is not None:
events = self.transform(events)
if self.target_transform is not None:
target = self.target_transform(target)
return events, target
[docs] def __len__(self):
file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r")
return len(file["labels"])
def _check_exists(self):
return (
self._is_file_present()
and self._folder_contains_at_least_n_files_of_type(1, ".h5")
)
[docs]class SHD(HSD):
"""`Spiking Heidelberg Digits <https://zenkelab.org/resources/spiking-heidelberg-datasets-shd/>`_
::
@article{cramer2020heidelberg,
title={The heidelberg spiking data sets for the systematic evaluation of spiking neural networks},
author={Cramer, Benjamin and Stradmann, Yannik and Schemmel, Johannes and Zenke, Friedemann},
journal={IEEE Transactions on Neural Networks and Learning Systems},
year={2020},
publisher={IEEE}
}
Parameters:
save_to (string): Location to save files to on disk. Will put files in an 'hsd' subfolder.
train (bool): If True, uses training subset, otherwise testing subset.
transform (callable, optional): A callable of transforms to apply to the data.
target_transform (callable, optional): A callable of transforms to apply to the targets/labels.
Returns:
A dataset object that can be indexed or iterated over. One sample returns a tuple of (events, targets).
"""
test_zip = "shd_test.h5.zip"
train_zip = "shd_train.h5.zip"
test_md5 = "1503a5064faa34311c398fb0a1ed0a6f"
train_md5 = "f3252aeb598ac776c1b526422d90eecb"
folder_name = ""
def __init__(
self,
save_to: str,
train: bool = True,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
):
super().__init__(
save_to,
transform=transform,
target_transform=target_transform,
)
if train:
self.url = self.base_url + self.train_zip
self.filename = self.train_zip
self.file_md5 = self.train_md5
else:
self.url = self.base_url + self.test_zip
self.filename = self.test_zip
self.file_md5 = self.test_md5
self.data_filename = self.filename[:-4]
if not self._check_exists():
self.download()
file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r")
self.classes = file["extra/keys"][()]
self._speaker = file["extra/speaker"][()]
@property
def speaker(self):
return self._speaker[()]
[docs]class SSC(HSD):
"""`Spiking Speech Commands <https://zenkelab.org/resources/spiking-heidelberg-datasets-shd/>`_
::
@article{cramer2020heidelberg,
title={The heidelberg spiking data sets for the systematic evaluation of spiking neural networks},
author={Cramer, Benjamin and Stradmann, Yannik and Schemmel, Johannes and Zenke, Friedemann},
journal={IEEE Transactions on Neural Networks and Learning Systems},
year={2020},
publisher={IEEE}
}
Parameters:
save_to (string): Location to save files to on disk. Will put files in an 'hsd' subfolder.
split (string): One of 'train', 'test' or 'valid'.
transform (callable, optional): A callable of transforms to apply to the data.
target_transform (callable, optional): A callable of transforms to apply to the targets/labels.
Returns:
A dataset object that can be indexed or iterated over. One sample returns a tuple of (events, targets).
"""
test_zip = "ssc_test.h5.zip"
train_zip = "ssc_train.h5.zip"
valid_zip = "ssc_valid.h5.zip"
test_md5 = "a35ff1e9cffdd02a20eb850c17c37748"
train_md5 = "d102be95e7144fcc0553d1f45ba94170"
valid_md5 = "b4eee3516a4a90dd0c71a6ac23a8ae43"
folder_name = ""
def __init__(
self,
save_to: str,
split: str = "train",
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
):
super().__init__(
save_to, transform=transform, target_transform=target_transform
)
if split == "train":
self.url = self.base_url + self.train_zip
self.filename = self.train_zip
self.file_md5 = self.train_md5
elif split == "test":
self.url = self.base_url + self.test_zip
self.filename = self.test_zip
self.file_md5 = self.test_md5
elif split == "valid":
self.url = self.base_url + self.valid_zip
self.filename = self.valid_zip
self.file_md5 = self.valid_md5
self.data_filename = self.filename[:-4]
if not self._check_exists():
self.download()
file = h5py.File(os.path.join(self.location_on_system, self.data_filename), "r")
self.classes = file["extra/keys"][()]
self._speaker = file["extra/speaker"][()]