Maven依赖

源头

<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
</dependency> <dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.8.0</version>
</dependency> <dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.8.0</version>
</dependency> <dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.8.0</version>
</dependency> <dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>1.8.0</version>
</dependency>
</dependencies>

改版

    <dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
</dependency> <dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>1.7.2</version>
</dependency> <dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.8.0</version>
</dependency>
</dependencies>

SQL语句

SELECT COUNT(*) FROM T13_REF_AIRPORT_SAT;--11008
--HUB_ID IATA_CD NAME_CN NAME_EN
SELECT COUNT(*) FROM T13_REF_AIRPORT_CITY_LINK;--9676
--*******LINK_ID AIRPORT_HUB_ID CITY_HUB_ID
SELECT COUNT(*) FROM T13_REF_CITY_SAT;--9624
--HUB_ID CITY_CD NAME_CN NAME_EN
SELECT COUNT(*) FROM T13_REF_CITY_COUNTRY_LINK;--9062
--*******LINK_ID COUNTRY_HUB_ID CITY_HUB_ID
SELECT COUNT(*) FROM T13_REF_COUNTRY_SAT;--356
--HUB_ID COUNTRY_CD NAME_CN NAME_EN SELECT *
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID; SELECT COUNT(*)
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID;--16759 SELECT X5.NAME_CN COUNTRY_CN_NAME,COUNT(X1.HUB_ID) COUNT_AIRPORT
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID
GROUP BY X5.NAME_CN
ORDER BY COUNT_AIRPORT DESC;--254 SELECT
X5.COUNTRY_CD,
X5.NAME_CN COUNTRY_NAME_CN,
X5.NAME_EN COUNTRY_NAME_EN,
X3.CITY_CD,
X3.NAME_CN CITY_CN_NAME,
X3.NAME_EN CITY_EN_NAME,
COUNT(X1.HUB_ID) COUNT_AIRPORT
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID
GROUP BY X5.COUNTRY_CD,X5.NAME_CN,X5.NAME_EN,X3.CITY_CD,X3.NAME_CN,X3.NAME_EN
ORDER BY COUNT_AIRPORT DESC;--13030 SELECT
X5.COUNTRY_CD,
X5.NAME_CN COUNTRY_NAME_CN,
X5.NAME_EN COUNTRY_NAME_EN,
X3.CITY_CD,
X3.NAME_CN CITY_CN_NAME,
X3.NAME_EN CITY_EN_NAME,
COUNT(X1.HUB_ID) COUNT_AIRPORT
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID
AND X3.NAME_EN IS NULL
GROUP BY X5.COUNTRY_CD,X5.NAME_CN,X5.NAME_EN,X3.CITY_CD,X3.NAME_CN,X3.NAME_EN
ORDER BY COUNT_AIRPORT DESC; --COUNTRY_NAME_EN=NULL 19
--CITY_CN_NAME=NULL 1
--CITY_EN_NAME=NULL 1501

Airport_Sat

import lombok.Data;

@Data
public class AirportSat
{
private String hub_id;
}

Airport_City_Link

import lombok.Data;

@Data
public class AirportCityLink
{
private String airport_hub_id;
private String city_hub_id;
}

City_Sat

import lombok.Data;

@Data
public class CitySat
{
private String hub_id;
private String city_cd;
private String name_cn;
private String name_en;
}

City_Country_Link

import lombok.Data;

@Data
public class CityCountryLink
{
private String country_hub_id;
private String city_hub_id;
}

Country_Sat

import lombok.Data;

@Data
public class CountrySat
{
private String hub_id;
private String country_cd;
private String name_cn;
private String name_en;
}

Flink_Csv

点击查看Flink_Csv代码
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.operators.SortPartitionOperator;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple7;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.BatchTableEnvironment; import java.text.SimpleDateFormat;
import java.util.Date; public class FlinkCsv
{
public static void main(String[] args) throws Exception
{
long s4 = System.currentTimeMillis();
t4();
System.out.println((System.currentTimeMillis() - s4) + "u");
long s5 = System.currentTimeMillis();
t5();
System.out.println((System.currentTimeMillis() - s5) + "d");
} private static void t5() throws Exception
{
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
BatchTableEnvironment table_env = BatchTableEnvironment.getTableEnvironment(env);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss SSS"); DataSet<AirportSat> data_airportsat = env.readCsvFile("D:\\T13_REF_AIRPORT_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true/*, true, false, true, true*/)
.pojoType(AirportSat.class, "hub_id"/*, "iata_cd", "name_cn", "name_en"*/); DataSet<AirportCityLink> data_airportcitylink = env.readCsvFile("D:\\T13_REF_AIRPORT_CITY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(AirportCityLink.class, "airport_hub_id", "city_hub_id"); DataSet<CitySat> data_citysat = env.readCsvFile("D:\\T13_REF_CITY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, true, true)
.pojoType(CitySat.class, "hub_id", "city_cd", "name_cn", "name_en"); DataSet<CityCountryLink> data_citycountrylink = env.readCsvFile("D:\\T13_REF_CITY_COUNTRY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(CityCountryLink.class, "country_hub_id", "city_hub_id"); DataSet<CountrySat> data_countrysat = env.readCsvFile("D:\\T13_REF_COUNTRY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, false, false, true, true)
.pojoType(CountrySat.class, "hub_id", "country_cd", "name_cn", "name_en"); table_env.registerTable("t13_ref_airport_sat", table_env.fromDataSet(data_airportsat));
table_env.registerTable("t13_ref_airport_city_link", table_env.fromDataSet(data_airportcitylink));
table_env.registerTable("t13_ref_city_sat", table_env.fromDataSet(data_citysat));
table_env.registerTable("t13_ref_city_country_link", table_env.fromDataSet(data_citycountrylink));
table_env.registerTable("t13_ref_country_sat", table_env.fromDataSet(data_countrysat)); String sql = "select count(*) \n" +
"\tfrom t13_ref_airport_sat x1,t13_ref_airport_city_link x2,\n" +
"\tt13_ref_city_sat x3,t13_ref_city_country_link x4,t13_ref_country_sat x5\n" +
"\twhere x1.hub_id=x2.airport_hub_id\n" +
"\t\tand x2.city_hub_id=x3.hub_id\n" +
"\t\tand x3.hub_id=x4.city_hub_id\n" +
"\t\tand x4.country_hub_id=x5.hub_id"; String sql_country = "select x5.name_cn country_cn_name,count(x1.hub_id) count_airport\n" +
"\tfrom t13_ref_airport_sat x1,t13_ref_airport_city_link x2,\n" +
"\tt13_ref_city_sat x3,t13_ref_city_country_link x4,t13_ref_country_sat x5\n" +
"\twhere x1.hub_id=x2.airport_hub_id\n" +
"\t\tand x2.city_hub_id=x3.hub_id\n" +
"\t\tand x3.hub_id=x4.city_hub_id\n" +
"\t\tand x4.country_hub_id=x5.hub_id\n" +
"\tgroup by x5.name_cn\n" +
"\torder by count_airport desc"; String sql_all = "select \n" +
"\tx5.country_cd,\n" +
"\tx5.name_cn country_name_cn,\n" +
"\tx5.name_en country_name_en,\n" +
"\tx3.city_cd,\n" +
"\tx3.name_cn city_cn_name,\n" +
"\tx3.name_en city_en_name,\n" +
"count(x1.hub_id) count_airport\n" +
"\tfrom t13_ref_airport_sat x1,t13_ref_airport_city_link x2,t13_ref_city_sat x3,t13_ref_city_country_link x4,t13_ref_country_sat x5\n" +
"\twhere x1.hub_id=x2.airport_hub_id\n" +
"\t\tand x2.city_hub_id=x3.hub_id\n" +
"\t\tand x3.hub_id=x4.city_hub_id\n" +
"\t\tand x4.country_hub_id=x5.hub_id\n" +
"\tgroup by x5.country_cd,x5.name_cn,x5.name_en,x3.city_cd,x3.name_cn,x3.name_en\n" +
"\torder by count_airport desc"; DataSet<Tuple1<Long>> map = table_env.toDataSet(table_env.sqlQuery(sql),
TypeInformation.of(new TypeHint<Tuple1<Long>>()
{
}));
map.print(); DataSet<Tuple2<String, Long>> map_country = table_env.toDataSet(table_env.sqlQuery(sql_country),
TypeInformation.of(new TypeHint<Tuple2<String, Long>>()
{
}));
System.out.println(map_country.count());
map_country.print(); Table result_country = table_env.sqlQuery(sql_country);
DataSet<Tuple7<String, String, String, String, String, String, Long>> map_all = table_env.toDataSet(table_env.sqlQuery(sql_all),
TypeInformation.of(new TypeHint<Tuple7<String, String, String, String, String, String, Long>>()
{
}));
System.out.println(map_all.count());
map_all.print(); map.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("T打印完成______map...");
map_country.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_country.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("T打印完成______map_country...");
map_all.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_all.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("T打印完成______map_all..."); env.execute("Hello!@ Fuck...");
} private static void t4() throws Exception
{
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss SSS"); DataSet<AirportSat> data_airportsat = env.readCsvFile("D:\\T13_REF_AIRPORT_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true/*, true, false, true, true*/)
.pojoType(AirportSat.class, "hub_id"/*, "iata_cd", "name_cn", "name_en"*/); DataSet<AirportCityLink> data_airportcitylink = env.readCsvFile("D:\\T13_REF_AIRPORT_CITY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(AirportCityLink.class, "airport_hub_id", "city_hub_id"); DataSet<CitySat> data_citysat = env.readCsvFile("D:\\T13_REF_CITY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, true, true)
.pojoType(CitySat.class, "hub_id", "city_cd", "name_cn", "name_en"); DataSet<CityCountryLink> data_citycountrylink = env.readCsvFile("D:\\T13_REF_CITY_COUNTRY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(CityCountryLink.class, "country_hub_id", "city_hub_id"); DataSet<CountrySat> data_countrysat = env.readCsvFile("D:\\T13_REF_COUNTRY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, false, false, true, true)
.pojoType(CountrySat.class, "hub_id", "country_cd", "name_cn", "name_en"); MapOperator<Tuple2<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, CountrySat>,
Tuple7<String, String, String, String, String, String, Long>> map = data_airportsat
.join(data_airportcitylink).where("hub_id").equalTo("airport_hub_id")
.join(data_citysat).where(new KeySelector<Tuple2<AirportSat, AirportCityLink>, String>()
{
@Override
public String getKey(Tuple2<AirportSat, AirportCityLink> t) throws Exception
{
return t.f1.getCity_hub_id();
}
}).equalTo("hub_id")
.join(data_citycountrylink).where(new KeySelector<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, String>()
{
@Override
public String getKey(Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat> t) throws Exception
{
return t.f1.getHub_id();
}
}).equalTo("city_hub_id")
.join(data_countrysat).where(new KeySelector<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, String>()
{
@Override
public String getKey(Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink> t) throws Exception
{
return t.f1.getCountry_hub_id();
}
}).equalTo("hub_id")
.map(new MapFunction<Tuple2<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, CountrySat>,
Tuple7<String, String, String, String, String, String, Long>>()
{ @Override
public Tuple7<String, String, String, String, String, String, Long> map(
Tuple2<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, CountrySat> t) throws Exception
{
String country_cd = t.f1.getCountry_cd();
String country_cn_name = t.f1.getName_cn();
String country_en_name = t.f1.getName_en();
String city_cd = t.f0.f0.f1.getCity_cd();
String city_cn_name = t.f0.f0.f1.getName_cn();
String city_en_name = t.f0.f0.f1.getName_en();
long airport = 1L;
return new Tuple7<>(country_cd, country_cn_name, country_en_name, city_cd, city_cn_name, city_en_name, airport);
}
});
//--------------------------------------------------------------------------------------------------------------
System.out.println("总数量: " + map.count());
SortPartitionOperator<Tuple2<String, Long>> map_country = map
.map(new MapFunction<Tuple7<String, String, String, String, String, String, Long>, Tuple2<String, Long>>()
{
@Override
public Tuple2<String, Long> map(Tuple7<String, String, String, String, String, String, Long> t) throws Exception
{
return new Tuple2<>(t.f1, t.f6);
}
}).groupBy(0).sum(1).sortPartition(1, Order.DESCENDING);
System.out.println("国家分总数量: " + map_country.count());
//map_country.print();
SortPartitionOperator<Tuple7<String, String, String, String, String, String, Long>> map_all = map
.groupBy(0, 1, 2, 3, 4, 5).sum(6).sortPartition(6, Order.DESCENDING);
System.out.println("全分总数量: " + map_all.count());
//map_all.print(); map.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("打印完成______map...");
map_country.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_country.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("打印完成______map_country...");
map_all.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_all.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("打印完成______map_all..."); env.execute("Hello!@ Fuck...");
}
}

最新文章

  1. Ajax异步刷新地址栏
  2. Logistic 回归(sigmoid函数,手机的评价,梯度上升,批处理梯度,随机梯度,从疝气病症预测病马的死亡率
  3. FATFS文件系统
  4. iOS开发——UI篇Swift篇&amp;UITextView
  5. Java String的== 与 equals小结
  6. net use \\192.168.54.145 /user:administrator &quot;12345qwert&quot;无法连接,错误码1326
  7. Codeforces Round #216 (Div. 2) D. Valera and Fools
  8. 阅读zepto.js的core中的Core methods
  9. C/C++常考面试题(一)
  10. (转载)SQL Server2008附加数据库之后显示为只读时解决方法
  11. React-Native学习手册----搭建基于ios平台的开发环境
  12. Matlab中hold on与hold off的用法
  13. 关于iOS与html交互,隐藏或修改html标签内容
  14. [ 高危 ] mt网主站SQL注入
  15. gitlab重置root的密码
  16. Android 真机调试
  17. 《Gradle权威指南》--Gradle构建脚本基础
  18. Hibernate DetachedCriteria实现
  19. 理解 Linux 的平均负载和性能监控
  20. protel99 se中出现许多Backup of 文件,修改过保存时,总会出现备份文件,怎么才能取消这一设置?

热门文章

  1. RabbitMQ学习之:(八)Topic Exchange (转贴+我的评论)
  2. Visual Studio Code 帮助查看器,指定的用于安装帮助内容的位置无效,或者您无权访问该位置
  3. const成员变量
  4. webbench接口并发测试
  5. 刀塔OMG塔防1.23单机版使用方法
  6. UUID相同导致的网络连接问题
  7. docker笔记(2)——docker镜像操作
  8. Web比赛4
  9. kernel32.dll 这个系统模块
  10. [转帖]【架构系列】龙芯loongson简介