data_loader读取器
2024-09-08 16:19:24
import random
import numpy as np
import pandas as pd
import cv2
def date_loader(image_dir, file_name, batch_size=1, mode='train'):
train_dir_list = []
train_label = []
test_dir_list = []
test_label = []
val_dir_list = []
val_label = []
df = pd.read_csv(file_name)
# 生成训练和测试数据集 0.8 /0.2
df = df.sample(frac=1)
for i in range(len(df)):
if i <= (len(df)*0.8-1):
dir = image_dir+ '/' + df.iloc[i][0] + '.jpg'
train_dir_list.append(dir)
train_label.append(int(df.iloc[i][1]-1))
else:
dir = image_dir+ '/' + df.iloc[i][0] + '.jpg'
test_dir_list.append(dir)
test_label.append(int(df.iloc[i][1]-1))
# 生成随机验证集,比列0.2
df1 = df.sample(frac=0.2)
for i in range(len(df1)):
dir = image_dir+ '/' + df1.iloc[i][0] + '.jpg'
val_dir_list.append(dir)
val_label.append(int(df.iloc[i][1]-1))
def reader():
batch_img = []
batch_label = []
if mode == 'train':
count = 0
for i in range(len(train_dir_list)):
img = cv2.imread(train_dir_list[i])
img = cv2.resize(img, (224,224), interpolation=cv2.INTER_CUBIC)/255
img = np.transpose(img, (2,0,1))
batch_img.append(img)
batch_label.append(train_label[i])
count +=1
if (count %batch_size==0):
# print(len(train_label))
yield np.array(batch_img).astype('float32'), np.asarray(batch_label).astype('int64').reshape(batch_size,1)
batch_img = []
batch_label = []
elif mode == 'test':
count = 0
for i in range(len(test_dir_list)):
img = cv2.imread(test_dir_list[i])
img = cv2.resize(img, (224,224), interpolation=cv2.INTER_CUBIC)/255
img = np.transpose(img, (2,0,1))
batch_img.append(img)
batch_label.append(test_label[i])
count +=1
if (count %batch_size==0):
# print(len(test_label))
yield np.array(batch_img).astype('float32'), np.asarray(batch_label).astype('int64').reshape(batch_size,1)
batch_img = []
batch_label = []
elif mode == 'val':
count = 0
for i in range(len(val_dir_list)):
img = cv2.imread(val_dir_list[i])
img = cv2.resize(img, (224,224), interpolation=cv2.INTER_CUBIC)/255
img = np.transpose(img, (2,0,1))
batch_img.append(img)
batch_label.append(val_label[i])
count +=1
if (count %batch_size==0):
# print(len(val_dir_list))
yield np.array(batch_img).astype('float32'), np.asarray(batch_label).astype('int64').reshape(batch_size,1)
batch_img = []
batch_label = []
return reader
a = date_loader('image2_100','a_100_drop_p.csv',mode='test')
for n , data in enumerate(a()):
images, label = data
# print(label)
break
train_reader = paddle.batch(date_loader('image2_100','a_100_drop_p.csv',mode='train'), batch_size=10)
test_reader = paddle.batch(date_loader('image2_100','a_100_drop_p.csv',mode='test'), batch_size=10)
最新文章
- wcf测试证书的创建
- css-margin与百分数的关系
- 【转】贾扬清:希望Caffe成为深度学习领域的Hadoop
- IOS开发之——CocoaPods安装和使用 OC和swift通吃
- Android如何在ListView中嵌套ListView
- windows下,用绝对路径向html文件中插入图片
- HDU1849 Rabbit and Grass()
- Jenkins TFS配置
- 美链BEC合约漏洞技术分析
- Python开发【内置模块篇】configparser
- Web应用程序的安全问题
- struts2框架(1)---struts2入门
- VUE环境搭建、创建项目、vue调试工具
- 如何设置IntelliJ IDEA智能感知支持Jsp内置对象
- tanera笔记
- [ 原创 ] Centos7.6安装Mysql5.7
- 黄聪:超实用的PHPExcel[导入][导出]实现方法总结
- php 去除所有空格 包括中文空格圆角空格
- zookeeper和duboo 没用
- bootstrap bootstrapvalidator插件+adjax验证使用