oracle plsql 实现apriori算法
对apriori关联关系算法研究了一段时间,网上能搜到的例子,大部分是python写的,数据集长得像下面这样:
[[I1,I2,I5],[I2,I4],[I2,I3],[I1,I2,I4],[I1,I3],[I2,I3],[I1,I3],[I1,I2,I3,I5],[I1,I2,I3]]
而实际的交易数据,一般存储到关系型数据库中,数据是按下面的样子保存:
TRAN_SEQ_NO,ITEM
1,I1
1,I2
1,I5
2,I2
2,I4
.
.8,I5
9,I1
9,I2
9,I3
而且python的程序,写了好多循环,效率不高。
根据小票数据在数据库中存储的特点,并且apriori算法也不是特别复杂,因此想用plsql实现一下。
plsql实现的aprioir算法,对原算法做了裁剪,只计算2项集和两个商品之间的关联关系,3项集以上的忽略不计。
表结构创建
1.小票表(交易事物表)
create table CMX_APRIORI_TRANSACTION
(
tran_seq_no NUMBER(20), --交易号
item VARCHAR2(25) --商品编码
)
2.频繁项集C1
create table CMX_APRIORI_L1
(
item VARCHAR2(25), --商品编码
support NUMBER(9,6), --支持度
cnt NUMBER(8) --交易次数
)
2.关联关系L2(结果表)
create table CMX_APRIORI_L2
(
item_a VARCHAR2(25), --前件商品编码
item_b VARCHAR2(25), --后件商品编码
cnt NUMBER(8), --交易次数
support NUMBER(9,6), --支持度
conf_a_b NUMBER(9,6), --置信度
lift_a_b NUMBER(9,6) --提升度
)
说明:
小票表插入的数据,必须提前进行处理。同一张小票商品要去重。
完整plsql代码:
CREATE OR REPLACE PACKAGE CMX_APRIORI_SQL IS /*-----------------------------------------------------------------------
* PROCEDURE NAME : CMX_APRIORI_SQL
* COMMENTS : 商品关联关系计算
* CODED BY : ONELANG 2019-1-27
* CHANGED HISTORY :
-----------------------------------------------------------------------*/
FUNCTION TEST(O_ERROR_MESSAGE IN OUT VARCHAR2) RETURN BOOLEAN;
END CMX_APRIORI_SQL;
/
CREATE OR REPLACE PACKAGE BODY CMX_APRIORI_SQL IS /*-----------------------------------------------------------------------
* PROCEDURE NAME : INIT_TEST_DATA
* COMMENTS : 初始化测试数据
* CODED BY : ONELANG 2019-1-27
* CHANGED HISTORY :
-----------------------------------------------------------------------*/ FUNCTION INIT_TEST_DATA(O_ERROR_MESSAGE IN OUT VARCHAR2) RETURN BOOLEAN IS
L_PROGRAM VARCHAR2(100) := 'CMX_APRIORI_SQL.INIT_TEST_DATA';
BEGIN EXECUTE IMMEDIATE 'TRUNCATE TABLE CMX_APRIORI_TRANSACTION'; INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (1,'I1');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (1,'I2');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (1,'I5');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (2,'I2');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (2,'I4');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (3,'I2');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (3,'I3');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (4,'I1');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (4,'I2');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (4,'I4');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (5,'I1');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (5,'I3');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (6,'I2');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (6,'I3');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (7,'I1');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (7,'I3');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (8,'I1');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (8,'I2');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (8,'I3');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (8,'I5');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (9,'I1');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (9,'I2');
INSERT INTO CMX_APRIORI_TRANSACTION (TRAN_SEQ_NO,ITEM)
VALUES (9,'I3');
COMMIT;
RETURN TRUE; EXCEPTION
WHEN OTHERS THEN
O_ERROR_MESSAGE := SQLERRM||TO_CHAR(SQLCODE);
RETURN FALSE;
END; /*-----------------------------------------------------------------------
* PROCEDURE NAME : GET_TEST_L1
* COMMENTS : 计算频繁项集L1
* CODED BY : ONELANG 2019-1-27
* CHANGED HISTORY :
-----------------------------------------------------------------------*/ FUNCTION GET_TEST_L1(O_ERROR_MESSAGE IN OUT VARCHAR2,I_SUPPORT IN NUMBER) RETURN BOOLEAN IS
L_PROGRAM VARCHAR2(100) := 'CMX_APRIORI_SQL.GET_TEST_L1';
L_TOTAL NUMBER(8);
BEGIN EXECUTE IMMEDIATE 'TRUNCATE TABLE CMX_APRIORI_L1'; SELECT COUNT(DISTINCT TRAN_SEQ_NO) INTO L_TOTAL
FROM CMX_APRIORI_TRANSACTION ; INSERT INTO CMX_APRIORI_L1
SELECT ITEM,ROUND(COUNT(1) / L_TOTAL,6) SUPPORT,COUNT(1) CNT
FROM CMX_APRIORI_TRANSACTION A
GROUP BY ITEM
HAVING COUNT(1) / L_TOTAL >= I_SUPPORT ; COMMIT; RETURN TRUE; EXCEPTION
WHEN OTHERS THEN
O_ERROR_MESSAGE := SQLERRM||TO_CHAR(SQLCODE); RETURN FALSE;
END; /*-----------------------------------------------------------------------
* PROCEDURE NAME : GET_L2
* COMMENTS : 获取关联关系L2
* CODED BY :
* CHANGED HISTORY :
-----------------------------------------------------------------------*/
FUNCTION GET_TEST_L2(O_ERROR_MESSAGE IN OUT VARCHAR2,I_SUPPORT IN NUMBER) RETURN BOOLEAN IS
L_PROGRAM VARCHAR2(100) := 'CMX_APRIORI_SQL.GET_L2';
L_TOTAL NUMBER(8);
BEGIN EXECUTE IMMEDIATE 'TRUNCATE TABLE CMX_APRIORI_L2'; SELECT COUNT(DISTINCT TRAN_SEQ_NO) INTO L_TOTAL
FROM CMX_APRIORI_TRANSACTION ; --2项集支持度
INSERT INTO CMX_APRIORI_L2(ITEM_A,ITEM_B,CNT,SUPPORT)
SELECT ITEM_A,ITEM_B,COUNT(DISTINCT TRAN_SEQ_NO) CNT, ROUND(COUNT(DISTINCT TRAN_SEQ_NO) / L_TOTAL,6) SUPPORT
FROM (SELECT A.TRAN_SEQ_NO,
A.ITEM ITEM_A,
B.ITEM ITEM_B
FROM CMX_APRIORI_TRANSACTION A,
CMX_APRIORI_TRANSACTION B
WHERE A.ITEM IN (SELECT ITEM FROM CMX_APRIORI_L1)
AND B.ITEM IN (SELECT ITEM FROM CMX_APRIORI_L1)
AND A.TRAN_SEQ_NO = B.TRAN_SEQ_NO
AND A.ITEM > B.ITEM
)
GROUP BY ITEM_A,ITEM_B
HAVING COUNT(DISTINCT TRAN_SEQ_NO) / L_TOTAL >= I_SUPPORT; --置信度
UPDATE CMX_APRIORI_L2 L2
SET CONF_A_B = (SELECT L2.SUPPORT / L1.SUPPORT FROM CMX_APRIORI_L1 L1 WHERE L2.ITEM_A = L1.ITEM); --提升度
UPDATE CMX_APRIORI_L2 L2
SET LIFT_A_B = (SELECT L2.CONF_A_B / L1.SUPPORT FROM CMX_APRIORI_L1 L1 WHERE L2.ITEM_B = L1.ITEM); COMMIT; RETURN TRUE; EXCEPTION
WHEN OTHERS THEN
O_ERROR_MESSAGE := SQLERRM||TO_CHAR(SQLCODE); RETURN FALSE;
END; FUNCTION TEST(O_ERROR_MESSAGE IN OUT VARCHAR2) RETURN BOOLEAN IS
L_PROGRAM VARCHAR2(100) := 'CMX_APRIORI_SQL.TEST';
L_MIN_SUPP NUMBER(9,5);
BEGIN L_MIN_SUPP := 0.1; IF INIT_TEST_DATA(O_ERROR_MESSAGE) = FALSE THEN
RETURN FALSE;
END IF; IF GET_TEST_L1(O_ERROR_MESSAGE,L_MIN_SUPP) = FALSE THEN
RETURN FALSE;
END IF; IF GET_TEST_L2(O_ERROR_MESSAGE,L_MIN_SUPP) = FALSE THEN
RETURN FALSE;
END IF; RETURN TRUE; EXCEPTION
WHEN OTHERS THEN
O_ERROR_MESSAGE := SQLERRM||TO_CHAR(SQLCODE);
RETURN FALSE;
END; END CMX_APRIORI_SQL;
/
运行:
declare
result boolean;
begin
-- Call the function
result := cmx_apriori_sql.test(o_error_message => :o_error_message);
-- Convert false/true/null to 0/1/null
:result := sys.diutil.bool_to_int(result);
end;
运行结果:
select * from cmx_apriori_l2 ITEM_A ITEM_B CNT SUPPORT CONF_A_B LIFT_A_B
1 I2 I1 4 0.444444 0.571428 0.857142
2 I4 I2 2 0.222222 1.000000 1.285714
3 I5 I3 1 0.111111 0.500000 0.750000
4 I3 I2 4 0.444444 0.666666 0.857142
5 I5 I1 2 0.222222 1.000000 1.499999
6 I3 I1 4 0.444444 0.666666 0.999999
7 I4 I1 1 0.111111 0.500000 0.750000
8 I5 I2 2 0.222222 1.000000 1.285714
用一家门店,一年的销售数据计算一下就会发现,尿布->啤酒根本没有关联关系。意外+惊喜。
最新文章
- mysql 表被锁时,需要执行的命令
- 遭遇input与button按钮背景图失效不显示的解决办法
- 在AngularJs中怎么设置请求头信息(headers)及不同方法的比较
- 冲刺阶段 day 8
- [转]oracle for update和for update nowait的区别
- TL-WR703 USB不稳定/当前的总结
- float保留7位double保留15位之后的数字四舍五进
- python图片小爬虫
- [转载]PDO防注入原理分析以及使用PDO的注意事项
- jquery图片放大镜和遮罩层效果
- schema 对象的简单介绍
- Ubuntu下Maven配置与Maven项目创建教程
- Xcode调试非异常导致崩溃的程序
- OSGI嵌入jetty应用服务器
- Qt5应用改变窗口大小时出现黑影
- java调用sap的webservice(需要登录验证)
- exce中42093和日期之间的关系
- Python之路,第九篇:Python入门与基础9
- Mac OS 10.12 - 在VMwear Workstation12.5.2中大写键和中英文输入法的切换!
- chrome 因js死循环卡住