Python SQL相关操作

环境
Anaconda3 Python 3.6, Window 64bit
目的
从MySQL数据库读取数据，进行数据查询、关联
代码
# -*- coding: utf-8 -*-

"""

Author: kimbo zhang

Mail: kimbo_zhang@163.com

"""

import pymysql

import pandas as pd

import numpy as np

db_conn = pymysql.connect(

    host="***",

    user="kimbo",

    passwd="***",

    port=3306,

    database="kimbo_test",

    charset="utf8"

)

# 执行sql语句

sql_cmd = "select id,coll_type,coll_amt,coll_cost from kimbo_test;"

sql_cmd2 = "select id,re_type as coll_type,re_amt as coll_amt,re_cost as coll_cost from kimbo_test2;"

# 导入数据

data1 = pd.read_sql(sql_cmd, db_conn)  # 赋值 kimbo_test

data2 = pd.read_sql(sql_cmd2, db_conn)  # 赋值 kimbo_test2

# 1. select 查询

# eg: select * from kimbo_test limit 5;

sl = data1.head(10)

print("查询前10条记录:\n")

print(sl)

print("\n-------------->\n")

# 2. 查询类型是：偏低的数据

# eg: select * from kimbo_test where coll_type='偏低';

wh = data1[(data1['coll_type'] == '偏低')]

print(wh)

# 3. 查询类型是：偏低和昂贵的数据

# eg：select * from kimbo_test where coll_type in ('偏低','昂贵');

wh2 = data1[(data1['coll_type'] == '偏低') | (data1['coll_type'] == '昂贵')]

print(wh2)

# 4. 查询类型是：偏低和昂贵,id 大于7000

# eg: select * from kimbo_test where coll_type in ('偏低','昂贵') and id >5000;

wh3 = data1[((data1['coll_type'] == '偏低') | (data1['coll_type'] == '昂贵')) & (data1['id'] > 5000)]

print(wh3)

# 5. 分组、汇总

# eg: select coll_type,sum(coll_amt)as coll_amt,sum(coll_cost)as coll_cost from kimbo_test group by coll_type;

gb = data1.groupby('coll_type').agg({'coll_amt': np.sum, 'coll_cost': np.sum})

print(gb)

# 5. 关联 inner join

# eg: select * from kimbo_test a inner join kimbo_test2 b on a.id=b.id ;

jn = pd.merge(data1, data2, on='id')

print(jn)

# 6. 关联 left join

# eg: select * from kimbo_test a left join kimbo_test2 b on a.id=b.id ;

jn2 = pd.merge(data1, data1, on='id', how='left')

print(jn2)

# 7. 关联 full join

# eg: select * from kimbo_test a full join kimbo_test2 b on a.id=b.id ;

jn3 = pd.merge(data1, data1, on='id', how='outer')

print(jn3)

# 8. 合并 union all ,列名需要一致

# eg select * from kimbo_test a union all select * from kimbo_test2 b ;

un2 = pd.concat([data1, data2])

print(un2)

# 9. 分析函数

# eg:

# SELECT * FROM (

# SELECT

# t.*,

# ROW_NUMBER() OVER(PARTITION BY coll_type ORDER BY coll_amt DESC) AS rn

# FROM kimbo_test t

# )

# WHERE rn < 3

# ORDER BY rn;

rn = data1.assign(rn=data1.sort_values(['coll_amt'], ascending=False).groupby('coll_type').cumcount() + 1

                  ).query('rn < 3').sort_values(['coll_type', 'rn'])

print("row_number 分析函数结果：")

print(rn)
结果展示
第一个和最后一个结果：
巴特西

Python SQL相关操作

最新文章

热门文章