需求介绍:

爬取指定地点的所有全国相关的列车班次详情。将结果写进mysql。

步骤及所遇到的问题:

1.寻取全国站点静态信息   https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002 一个静态的js文件

2.借助web工具,本人谷歌浏览器F12在12306相关页面找取所需要的接口

3.寻找规律,注意去重写进mysql

直接上代码,看注释:

object TrainSchedulesMain {
def main(args: Array[String]): Unit = {
// val dateStr = "2019-01-07"
if (args == null || args.length < 2) {
System.err.println("args is null or missing")
System.exit(1)
}
val dateStrList = args(0).trim
val station = args(1).trim assert(StringUtils.isNotBlank(dateStrList), "dateStrList is null or empty")
assert(StringUtils.isNotBlank(station), "station is null or empty") // 打印参数
println(args.mkString(" "))
//获取全国所有的站点信息
val allStationsMap = analysisAllStations()
/*
* 1.深圳
* 2.深圳西
* 3.深圳东
* 4.深圳坪山
* 5.深圳北
* 6.福田
*/
// val fromStations = collection.mutable.ListBuffer("深圳", "深圳西", "深圳东", "深圳坪山", "深圳北", "福田")
var dateStr = "";
//出发站集合
val fromStations = collection.mutable.ListBuffer(station.trim)
dateStrList.split(",").foreach(date => {
dateStr = date
fromStations.foreach(r => {
var n = 0
//出发站
val fromStationRequest = allStationsMap.get(r).get
allStationsMap.foreach(d => {
val toStationRequest = d._2 //train code
//根据出发站和到达站请求12306
excuteAnaly(fromStationRequest, toStationRequest)
println(dateStr)
println("进度:" + r)
n += 1
println(n)
}) })
}) def excuteAnaly(fromStationRequest: String, toStationRequest: String) = {
val url1 =
s"""https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=${dateStr}&leftTicketDTO.from_station=${fromStationRequest}&leftTicketDTO.to_station=${toStationRequest}&purpose_codes=ADULT"""
.stripMargin
println("url1: " + url1)
//获取所查询的 出发站-》到达站 的所有班次列车信息
val responseStr = HttpRequest.sendGet(url1)
println("url1Res: " + responseStr)
if (StringUtils.isNotBlank(responseStr) && JSONUtil.isJson(responseStr)) {
val allStationModel = JSONUtil.toJavaBean(responseStr, new AllStationTimes().getClass)
//表示初始站为深圳 目的地为 toStationRequest 是有值
if (allStationModel != null && allStationModel.getHttpstatus == 200 && allStationModel.getData.getResult != null && allStationModel.getData.getResult.size() > 0) {
val resList = allStationModel.getData.getResult
for (item <- 0 until resList.size()) {
val str = resList.get(item)
val indexNumStart = str.indexOf("预订")
if (indexNumStart > -1) {
val arrs = str.substring(indexNumStart).split("\\|") val trainNo = arrs(1)
val trainCode = arrs(2)
val startStation = arrs(3)
val endStation = arrs(4)
val fromStation = arrs(5)
val toStation = arrs(6)
// 并且目的地站必须是终点站才会当做一条 班次 写入msyql[否则是过站,导致重复写入mysql.并且站点还不全]
if (toStation.trim.equals(endStation.trim)) {
val url2 =s"""https://kyfw.12306.cn/otn/czxx/queryByTrainNo?train_no=${trainNo}&from_station_telecode=${fromStation}&to_station_telecode=${toStation}&depart_date=${dateStr}"""
println("url2: " + url2)
//查询上一步所有班次的详细各个站点顺序信息
val res = HttpRequest.sendGet(url2)
println("url2Res: " + res)
if (StringUtils.isNotBlank(res) && JSONUtil.isJson(res)) {
val trainSchedulesModel = JSONUtil.toJavaBean(res, new TrainSchedules().getClass)
if (trainSchedulesModel != null && trainSchedulesModel.getData.getData.size() > 0) {
val data0 = trainSchedulesModel.getData.getData.get(0) if (!MysqlHandleUtil(MysqlConnect.trainDB).isHasValue(
s"""select *
|
|from train_schedules
|where train_code='${data0.getStation_train_code}'
|and start_station_name='${data0.getStart_station_name}'
|and end_station_name='${data0.getEnd_station_name}'
|and start_time='${data0.getStart_time}'
|and dates='$dateStr'
|"""
.stripMargin)) {
val sql =
s"""insert into
|train_schedules(`train_code`,`start_station_name`,`end_station_name`,`start_time`,`arrive_time`,`dates`,`data`)
|values('${data0.getStation_train_code}','${data0.getStart_station_name}','${data0.getEnd_station_name}','${data0.getStart_time}','${trainSchedulesModel.getData.getData.get(trainSchedulesModel.getData.getData.size() - 1).getArrive_time}','${dateStr}','${JSONUtil.toJsonString(trainSchedulesModel.getData.getData)}')"""
.stripMargin
MysqlHandleUtil(MysqlConnect.trainDB).insertData(sql)
}
}
}
}
}
}
}
}
}
} /**
* 解析全国所有站点(中文名字,编码)
*
* @author XXXX
* @date 17:08
* @param []
* @return scala.collection.mutable.HashMap<java.lang.String,java.lang.String>
*/
def analysisAllStations(): collection.mutable.HashMap[String, String] = {
val hashMap = new mutable.HashMap[String, String]()
val url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002"
val response = HttpRequest.sendGet(url)
assert(StringUtils.isNotBlank(response), "response is null or empty")
val arrs = response.split("@")
val len = arrs.length
for (item <- 1 until len) {
val ars = arrs(item).split("\\|")
hashMap.put(ars(1), ars(2))
}
hashMap
}
}

  

效果:

最新文章

  1. 用java String类的getBytes(String charsetName)和String(byte[] bytes, String charsetName)解决乱码问题
  2. spring mvc学习笔记二:@RequestMapping
  3. html5-表单
  4. [mysql]brew 安装 配置 操作 mysql(中文问题)
  5. Windows下python的配置
  6. 安卓虚拟机启动失败intel haxm未安装
  7. Cassandra 技术选型的问题
  8. 预告:准备开个坑,集中学习一下esp32模块
  9. HTTP2.0那些事
  10. BZOJ NOI十连测 第二测 T1
  11. Java面试题—初级(1)
  12. [SCOI2008]着色方案
  13. CSS学习笔记3:选择器及优先级
  14. Node.js 串口通讯 node-serialport 使用说明
  15. Vue Baidu Map局部注册实现和地图绘点
  16. linux 学习笔记 groupadd创建组
  17. PCL点云特征描述与提取(4)
  18. Shell脚本编程入门到放弃
  19. 20155211 2016-2017-2 《Java程序设计》第一周学习总结
  20. CSS 伪类和伪元素--pseudo

热门文章

  1. 数据迁移:MSSQL脚本文件过大,客户端没有足够的内存继续执行程序
  2. python传递参数给shell
  3. Python学习---django下的cookie操作 180201
  4. Django路由系统---url无命名分组
  5. linux setup 相关text mode图形配置工具的安装
  6. jq实现 元素显示后 点击页面的任何位置除元素本身外 隐藏元素
  7. 高性能 Socket 组件 HP-Socket v3.2.1-RC3 公布
  8. tar 打包带软连接的文件
  9. ubuntu ibus 输入法总在左下角不跟随光标的处理
  10. mongodb3.2副本集配置