File size: 1,811 Bytes
e22b55b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import h5py
import os

class Hdf5Writer:
    def __init__(self, dims, outputPath, dbName='Images', buffSize= 1000):

        # throw an error if the file already exists
        if os.path.exists(outputPath):
            raise ValueError("PATH ALREADY PRESENT. PLEASE DELETE FILES"
                             "BEFORE PROCEEDING.")

        # database to store data
        self.db= h5py.File(outputPath, 'w')
        # define dataset containers to store data and labels
        self.data= self.db.create_dataset(dbName, dims, dtype='float')
        self.labels= self.db.create_dataset('Labels', shape=(dims[0],), dtype='int')

        # defining a buffer and index variable for the buffer
        self.buffSize= buffSize
        self.buffer= {"data": [], "labels": []}
        self.idx= 0

    def add(self, values, labels):
        self.buffer['data'].extend(values)
        self.buffer['labels'].extend(labels)

        if len(self.buffer['data'])>=self.buffSize:
            self.flush()

    def flush(self):
        # When buffer size is reached flush data to dataset container
        temp_idx= self.idx + len(self.buffer['data'])
        # index from prev_idx to new_idx
        self.data[self.idx:temp_idx]= self.buffer['data']
        self.labels[self.idx:temp_idx]= self.buffer['labels']
        # update new_idx
        self.idx=temp_idx
        # reinitialize the buffer
        self.buffer={'data': [], 'labels': []}

    def flushClassNames(self, classNames):

        # Creating a special
        labelNames= self.db.create_dataset('Label_Names', (len(classNames),),
                                           dtype=h5py.special_dtype(vlen=unicode))
        labelNames[:]= classNames

    def close(self):

        if len(self.buffer['data'])>0:
            self.flush()

        self.db.close()