williamcfrancis's picture
Upload 74 files
e22b55b
import h5py
import os
class Hdf5Writer:
def __init__(self, dims, outputPath, dbName='Images', buffSize= 1000):
# throw an error if the file already exists
if os.path.exists(outputPath):
raise ValueError("PATH ALREADY PRESENT. PLEASE DELETE FILES"
"BEFORE PROCEEDING.")
# database to store data
self.db= h5py.File(outputPath, 'w')
# define dataset containers to store data and labels
self.data= self.db.create_dataset(dbName, dims, dtype='float')
self.labels= self.db.create_dataset('Labels', shape=(dims[0],), dtype='int')
# defining a buffer and index variable for the buffer
self.buffSize= buffSize
self.buffer= {"data": [], "labels": []}
self.idx= 0
def add(self, values, labels):
self.buffer['data'].extend(values)
self.buffer['labels'].extend(labels)
if len(self.buffer['data'])>=self.buffSize:
self.flush()
def flush(self):
# When buffer size is reached flush data to dataset container
temp_idx= self.idx + len(self.buffer['data'])
# index from prev_idx to new_idx
self.data[self.idx:temp_idx]= self.buffer['data']
self.labels[self.idx:temp_idx]= self.buffer['labels']
# update new_idx
self.idx=temp_idx
# reinitialize the buffer
self.buffer={'data': [], 'labels': []}
def flushClassNames(self, classNames):
# Creating a special
labelNames= self.db.create_dataset('Label_Names', (len(classNames),),
dtype=h5py.special_dtype(vlen=unicode))
labelNames[:]= classNames
def close(self):
if len(self.buffer['data'])>0:
self.flush()
self.db.close()