Python Operation to Generate batch Data

  • 2021-09-24 22:59:25
  • OfStack

Generate batch data

Each sample in the input data can have multiple features and 1 tag, preferably in numpy. array format.

datas = [data1, data2,..., dataN], labels = [label1, label2,..., labelN],

Where data [i] = [feature1, feature2, … featureM] means that each sample data has M features.

Enter the data for our method, all_data = [datas, labels].

Code implementation

The data of batch size is generated by the index value, and the selection of whether to disturb the order is provided, and the order is disturbed according to the index value of the randomly generated data amount range class.


import numpy as np
def batch_generator(all_data , batch_size, shuffle=True):
 """
 :param all_data : all_data Entire dataset, including input and output labels 
 :param batch_size: batch_size Represents each batch The size of 
 :param shuffle:  Is the order out of order 
 :return:
 """
 #  Input all_datas Every one of 1 Items must be numpy Array, make sure you can press p Values shown 
 all_data = [np.array(d) for d in all_data]
 #  Get the sample size 
 data_size = all_data[0].shape[0]
 print("data_size: ", data_size)
 if shuffle:
  #  Randomly generate scrambled indexes 
  p = np.random.permutation(data_size)
  #  Reorganize data 
  all_data = [d[p] for d in all_data]
 batch_count = 0
 while True:
  #  Data 1 Wheel cycle (epoch) Finish, upset 1 Subsequence 
  if batch_count * batch_size + batch_size > data_size:
   batch_count = 0
   if shuffle:
    p = np.random.permutation(data_size)
    all_data = [d[p] for d in all_data]
  start = batch_count * batch_size
  end = start + batch_size
  batch_count += 1
  yield [d[start: end] for d in all_data]

Test data

The sample data x and the tag y can be input separately or simultaneously.


#  Input x Indicates having 23 Samples, each with two characteristics 
#  Output y Indicates having 23 Tags, each with a value of 0 Or 1
x = np.random.random(size=[23, 2])
y = np.random.randint(2, size=[23,1])
count = x.shape[0]
batch_size = 5
epochs = 20
batch_num = count // batch_size
batch_gen = batch_generator([x, y], batch_size)
for i in range(epochs):
 print("##### epoch %s ##### " % i)
 for j in range(batch_num):
  batch_x, batch_y = next(batch_gen)
  print("-----epoch=%s, batch=%s-----" % (i, j))
  print(batch_x, batch_y)

Supplement: Constructing batch datasets using tensorflow. data. Dataset


import tensorflow as tf
import numpy as np
def _parse_function(x):
 num_list = np.arange(10)
 return num_list
def _from_tensor_slice(x):
 return tf.data.Dataset.from_tensor_slices(x)
softmax_data = tf.data.Dataset.range(1000) #  Structure 1 Queues 
softmax_data = softmax_data.map(lambda x:tf.py_func(_parse_function, [x], [tf.int32]))#  Pass data in 
softmax_data = softmax_data.flat_map(_from_tensor_slice) # Tile the data ,  Change it to 1 Dimension data, from_tensor_slice The data can be output 
softmax_data = softmax_data.batch(1) # Structure 1 A batch Quantity of 
softmax_iter = softmax_data.make_initializable_iterator() #  Construct data iterator 
softmax_element = softmax_iter.get_next() #  Obtain 1 A batch Data of 
sess = tf.Session()
sess.run(softmax_iter.initializer) #  Initialization operation of data iterator 
print(sess.run(softmax_element)) #  Actual acquisition 1 Data 
print(sess.run(softmax_data))

Related articles: