# coding: utf-8

# In[18]:

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc

# In[32]:

data=pd.read_csv(r"D:\Users\sgg91044\Desktop\bad_wafer_data_pivot.csv")

# In[33]:

data.head()

# In[34]:

index=data.drop(columns=["defect_count","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MAX.","HELK_MEAN","HELK_SD","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE","THR3_SD"])
index=index.drop(columns="Target")
index

# In[35]:

data=data.drop(columns=["lotid","Step","Recipie_Name","defect_count"])
data.head()

# In[36]:

ohe = OneHotEncoder()
le = LabelEncoder()

# In[37]:

data.head()

# In[40]:

data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
data.head()

# In[41]:

data=data.drop(columns=["eqpid","slotid","Chamber"])
data.head()

# In[42]:

nz = Normalizer()
data.iloc[:,10:12]=pd.DataFrame(nz.fit_transform(data.iloc[:,10:12]),columns=data.iloc[:,10:12].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)
data.head()

# In[43]:

def cleaning():
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\bad_wafer_data_pivot.csv")
data=data.drop(columns=["lotid","Step","Recipie_Name","defect_count"])
le = LabelEncoder()
data["eqp_encoded"] = le.fit_transform(data.iloc[:,0])
data["slot_encoded"] = le.fit_transform(data.iloc[:,1])
data['chamber_encoded'] = le.fit_transform(data.iloc[:,2])
data=data.drop(columns=["eqpid","slotid","Chamber"])
nz = Normalizer()
data.iloc[:,10:12]=pd.DataFrame(nz.fit_transform(data.iloc[:,10:12]),columns=data.iloc[:,10:12].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)

最新文章

  1. 前端 初识angularJS的基本概念
  2. dropdownlist 动态添加
  3. Browser默认书签加载过程
  4. 【转载】OGRE 内存管理
  5. linux文件操作命令--转
  6. Java_Hbase Timeout issue
  7. JavaScript和php常用语法——切割字符串
  8. string的数值转换
  9. 再起航,我的学习笔记之JavaScript设计模式16(享元模式)
  10. iOS中NSTimer的invalidate调用之后
  11. OpenGL入门之入门
  12. redis命令Map类型(五)
  13. Nginx简单手册
  14. 《Linux.Shell编程从入门到精通》读书笔记
  15. CAP理论介绍
  16. 在ASP.NET MVC实现购物车,尝试一种不同于平常的购物车显示方式
  17. Unity3D调用android方法(非插件方式)
  18. (sklearn)机器学习模型的保存与加载
  19. jQuery插件制作方法详解
  20. 【SDOI2015】序列统计 解题报告

热门文章

  1. Learning-Python【11】:函数嵌套及作用域
  2. 一个数组中两个数的和为N,找出这两个数字的下标
  3. 浅谈JS中的typeof和instanceof的区别
  4. navicat 链接 mysql 报错1251
  5. openssl 交叉编译
  6. ECharts注释
  7. [python](windows)分布式进程问题:pickle模块不能序列化lambda函数
  8. 20175317 《Java程序设计》第三周学习总结
  9. 雷林鹏分享:jQuery EasyUI 数据网格 - 条件设置行背景颜色
  10. scala面试题总结