遍历CSDN博客
2024-10-08 05:59:36
--http://blog.csdn.net/leixiaohua1020/article/list/14?viewmode=contents function saveData(data)
local file = io.open("temp.html", "w")
file:write(data)
file:close()
end function getPageCount(url)
local http = require("socket.http")
local resp = http.request(url)
local s = string.find(resp, "<div id=\"papelist\" class=\"pagelist\">")
local e = string.find(resp, "</div>", s)
local divData = string.sub(resp, s, e + 5)
--print(divData)
local i, j = string.find(divData, "共%d+页")
local pageCount = string.sub(divData, i + 3, j - 3)
return pageCount
end function getTitles(username)
if(username == nil or username == "") then
print("username is nil")
return
end
local preUrl = "http://blog.csdn.net/"
local endUrl = "?viewmode=contents"
local url = preUrl .. username .. endUrl
local pageCount = getPageCount(url)
for i = 1, pageCount do
local blogUrl = preUrl .. username .. "/article/list/" .. i .. endUrl
local http = require("socket.http")
local resp = http.request(blogUrl)
local pos = 1
_, pos = string.find(resp, "link_title", pos)
while(pos ~= nil) do
local i, j = string.find(resp, "details/%d+\">", pos)
--print("pageid = " .. string.sub(resp, i + 8, j - 2))
local k, _ = string.find(resp, "</a></span>", j)
print(string.sub(resp, j + 11, k - 23))
_, pos = string.find(resp, "link_title", pos)
end
end
end getTitles("leixiaohua1020")
如果想保存一篇指定的博文(只要正文),该怎么做呢?
其实只要我们预先保存下博文的js、css和一些控制正文的html标记,然后把正文内容填充进去就可以了。
以下lua脚本可以获得
<div id="article_content" class="article_content">
正文
</div>
start.html
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<script src="http://static.blog.csdn.net/scripts/jquery.js" type="text/javascript"></script>
<link rel="Stylesheet" type="text/css" href="http://static.blog.csdn.net/skin/skin-blue/css/style.css?v=1.1" />
<link rel="shortcut icon" href="http://c.csdnimg.cn/public/favicon.ico" />
<link type="text/css" rel="stylesheet" href="http://static.blog.csdn.net/scripts/SyntaxHighlighter/styles/default.css" />
</head>
<body>
<div id="container">
<script type="text/javascript">
var username = "x_iya";
var _blogger = username;
var blog_address = "http://blog.csdn.net/x_iya";
var static_host = "http://static.blog.csdn.net";
var currentUserName = "";
</script>
<div id="body">
<div id="main">
<div class="main">
<div id="article_details" class="details">
end.html
</div>
</div>
</div>
<script type="text/javascript" src="http://static.blog.csdn.net/scripts/newblog.min.js"></script>
</div>
</body>
</html>
lua代码:
function GetHtml(url)
local http = require("socket.http")
local resp = http.request(url)
local s = string.find(resp, "<div id=\"article_content\" class=\"article_content\">")
local e = string.find(resp, "<!--", s)
local data = string.sub(resp, s, e - 1)
return data
end function SaveData(data)
local file = io.open("csdn.html", "w")
file:write(data)
file:close()
end function ReadData(filepath)
local file = io.open(filepath, "r")
local data = file:read("*a")
file:close()
return data
end --local url = "http://blog.csdn.net/x_iya/article/details/52327827" if #arg == 1 then
local url = arg[1]
local startData = ReadData("start.html")
local endData = ReadData("end.html")
local html = startData .. GetHtml(url) .. endData
SaveData(html)
else
print("Usage: lua csdn.lua url")
end
最新文章
- html&;css中的文字对齐问题
- [转]iOS开发中的火星坐标系及各种坐标系转换算法
- HTML总结
- 002-添加网站ico图标
- UIButtonTypeSystem backBarButtonItem
- EXCEL导入(反射)
- 【MVC】自定义Scaffold Template
- java读取配置文件中数据
- Stream Collector
- Mac上安装brew
- Servlet &; JSP - 转发与重定向的区别
- ListView之SimpleAdapter
- GIT分支管理是一门艺术(转)
- 阅读MDN文档之CSS选择器介绍(一)
- selenium的使用技巧及集成到scrapy
- HBase之Table.put客户端流程(续)
- Python:SQLMap源码精读—start函数
- 采用spring的schedule注解配置定时任务
- MySQL-8.0.x 新特性之索引页合并
- PHP 数字序数&;字母序数 相互转化
热门文章
- Java集合使用之next方法与remove方法 | Java集合使用之remove方法使用易错
- C# async await 死锁问题总结
- wow.js 使用及效果列表
- 使用阿里云 ECS 快速部署 WordPress 博客系统
- SpringBoot介绍与使用
- Spring Boot2 系列教程(三十一)Spring Boot 构建 RESTful 风格应用
- OpenLayers4 隐藏(hide)Feature
- js中如何将伪数组转换成数组
- 简单介绍HTTP的请求(get请求和post请求)以及对应的响应的内容
- Django后台管理系统的使用