The version of numpy data

import numpy as np

class Dataset:
def __init__(self, data):
self._index_in_epoch = 0
self._epochs_completed = 0
self._data = data
self._num_examples = data.shape[0]
pass @property
def data(self):
return self._data def next_batch(self, batch_size, shuffle=True):
start = self._index_in_epoch
if start == 0 and self._epochs_completed == 0:
idx = np.arange(0, self._num_examples)
np.random.shuffle(idx) # shuffle indexe
self._data = self.data[idx] # get the shuffled data # go to the data of next batch
if start + batch_size > self._num_examples:
'''
note: when start == self._num_examples, data_rest_part = np.array([])
'''
self._epochs_completed += 1
# print(self.data)
rest_num_examples = self._num_examples - start
data_rest_part = self.data[start:self._num_examples]
idx_update = np.arange(0, self._num_examples)
np.random.shuffle(idx_update)
self._data = self.data[idx_update] # get another shuffled data start = 0
self._index_in_epoch = batch_size - rest_num_examples
end = self._index_in_epoch
data_new_part = self._data[start:end]
return np.concatenate((data_rest_part, data_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._data[start:end] dataset = Dataset(np.arange(0, 10))
for i in range(10):
print(dataset.next_batch(6))
print(dataset.data)

The version of pandas data

import numpy as np
import pandas as pd
class Dataset:
def __init__(self, data):
self._index_in_epoch = 0
self._epochs_completed = 0
self._data = data
self._num_examples = data.shape[0]
pass @property
def data(self):
return self._data def next_batch(self, batch_size, shuffle=True):
start = self._index_in_epoch
if start == 0 and self._epochs_completed == 0:
idx = np.arange(0, self._num_examples)
np.random.shuffle(idx) # shuffle index
self._data = self.data.iloc[idx,:] # get the shuffled data # go to the data of next batch
if start + batch_size > self._num_examples:
'''
note: when start == self._num_examples, data_rest_part = np.array([])
'''
self._epochs_completed += 1
# print(self.data) # this is for debug
rest_num_examples = self._num_examples - start
data_rest_part = self.data.iloc[start:self._num_examples,:]
idx_update = np.arange(0, self._num_examples)
np.random.shuffle(idx_update)
self._data = self.data.iloc[idx_update,:] # get another shuffled data start = 0
self._index_in_epoch = batch_size - rest_num_examples
end = self._index_in_epoch
data_new_part = self._data.iloc[start:end,:]
return pd.concat((data_rest_part, data_new_part), axis=0)
else:
self._index_in_epoch += batch_size
end = self._index_in_epoch
return self._data[start:end] df = pd.DataFrame()
df['a']=np.arange(10)
df['b']=np.arange(10)*10
dataset = Dataset(df)
for i in range(10):
print(dataset.next_batch(5))
print(dataset.data)

最新文章

  1. Eclipse使用Maven创建web3.0项目
  2. 树链剖分+线段树 HDOJ 4897 Little Devil I(小恶魔)
  3. SQL JOIN\SQL INNER JOIN 关键字\SQL LEFT JOIN 关键字\SQL RIGHT JOIN 关键字\SQL FULL JOIN 关键字
  4. ilspy导致c# dll代码被窃取
  5. json_encode中文unicode的问题
  6. JavaScript学习笔记- 正则表达式常用验证
  7. IOS UINavigationController 导航控制器
  8. atitit.spring3 mvc url配置最佳实践
  9. SQL Server 批量插入数据的方法
  10. Easy Problem-map和vector的使用
  11. Log4Net总结
  12. J2EE基础总结(1)——J2EE入门
  13. PC-CSS-分隔线
  14. POJ-1861-NETWORK 解题报告
  15. iOS scrollView中嵌套多个tabeleView处理方案
  16. ASP.NET Core 快速入门(实战篇)
  17. iOS----------YYModel
  18. AHOI2019N省联考凉凉记
  19. PTA8
  20. shell 4注释

热门文章

  1. shell脚本之nginx启动脚本、统计日志字段、for循环实战、跳板机
  2. Neo4j/Cypher: All paths between two nodes with a relationship property filter
  3. windows下使用zookeeper
  4. luogu题解 P1462 【通往奥格瑞玛的道路】二分+spfa
  5. 110、通过案例学习Secret (Swarm17)
  6. 【转载】Linux GCC常用命令
  7. 修改MySQL表中自增编号
  8. SSD源码解读——损失函数的构建
  9. MSSQL日期分组排序
  10. 第二章 Vue快速入门-- 28 自定义按键修饰符