--http://blog.csdn.net/leixiaohua1020/article/list/14?viewmode=contents

function saveData(data)
local file = io.open("temp.html", "w")
file:write(data)
file:close()
end function getPageCount(url)
local http = require("socket.http")
local resp = http.request(url)
local s = string.find(resp, "<div id=\"papelist\" class=\"pagelist\">")
local e = string.find(resp, "</div>", s)
local divData = string.sub(resp, s, e + 5)
--print(divData)
local i, j = string.find(divData, "共%d+页")
local pageCount = string.sub(divData, i + 3, j - 3)
return pageCount
end function getTitles(username)
if(username == nil or username == "") then
print("username is nil")
return
end
local preUrl = "http://blog.csdn.net/"
local endUrl = "?viewmode=contents"
local url = preUrl .. username .. endUrl
local pageCount = getPageCount(url)
for i = 1, pageCount do
local blogUrl = preUrl .. username .. "/article/list/" .. i .. endUrl
local http = require("socket.http")
local resp = http.request(blogUrl)
local pos = 1
_, pos = string.find(resp, "link_title", pos)
while(pos ~= nil) do
local i, j = string.find(resp, "details/%d+\">", pos)
--print("pageid = " .. string.sub(resp, i + 8, j - 2))
local k, _ = string.find(resp, "</a></span>", j)
print(string.sub(resp, j + 11, k - 23))
_, pos = string.find(resp, "link_title", pos)
end
end
end getTitles("leixiaohua1020")

如果想保存一篇指定的博文(只要正文),该怎么做呢?

其实只要我们预先保存下博文的js、css和一些控制正文的html标记,然后把正文内容填充进去就可以了。

以下lua脚本可以获得

<div id="article_content" class="article_content">
正文
</div>

start.html

<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<script src="http://static.blog.csdn.net/scripts/jquery.js" type="text/javascript"></script>
<link rel="Stylesheet" type="text/css" href="http://static.blog.csdn.net/skin/skin-blue/css/style.css?v=1.1" />
<link rel="shortcut icon" href="http://c.csdnimg.cn/public/favicon.ico" />
<link type="text/css" rel="stylesheet" href="http://static.blog.csdn.net/scripts/SyntaxHighlighter/styles/default.css" />
</head>
<body>
<div id="container">
<script type="text/javascript">
var username = "x_iya";
var _blogger = username;
var blog_address = "http://blog.csdn.net/x_iya";
var static_host = "http://static.blog.csdn.net";
var currentUserName = "";
</script>
<div id="body">
<div id="main">
<div class="main">
<div id="article_details" class="details">

end.html

</div>
</div>
</div>
<script type="text/javascript" src="http://static.blog.csdn.net/scripts/newblog.min.js"></script>
</div>
</body>
</html>

lua代码:

function GetHtml(url)
local http = require("socket.http")
local resp = http.request(url)
local s = string.find(resp, "<div id=\"article_content\" class=\"article_content\">")
local e = string.find(resp, "<!--", s)
local data = string.sub(resp, s, e - 1)
return data
end function SaveData(data)
local file = io.open("csdn.html", "w")
file:write(data)
file:close()
end function ReadData(filepath)
local file = io.open(filepath, "r")
local data = file:read("*a")
file:close()
return data
end --local url = "http://blog.csdn.net/x_iya/article/details/52327827" if #arg == 1 then
local url = arg[1]
local startData = ReadData("start.html")
local endData = ReadData("end.html")
local html = startData .. GetHtml(url) .. endData
SaveData(html)
else
print("Usage: lua csdn.lua url")
end

最新文章

  1. html&amp;css中的文字对齐问题
  2. [转]iOS开发中的火星坐标系及各种坐标系转换算法
  3. HTML总结
  4. 002-添加网站ico图标
  5. UIButtonTypeSystem backBarButtonItem
  6. EXCEL导入(反射)
  7. 【MVC】自定义Scaffold Template
  8. java读取配置文件中数据
  9. Stream Collector
  10. Mac上安装brew
  11. Servlet &amp; JSP - 转发与重定向的区别
  12. ListView之SimpleAdapter
  13. GIT分支管理是一门艺术(转)
  14. 阅读MDN文档之CSS选择器介绍(一)
  15. selenium的使用技巧及集成到scrapy
  16. HBase之Table.put客户端流程(续)
  17. Python:SQLMap源码精读—start函数
  18. 采用spring的schedule注解配置定时任务
  19. MySQL-8.0.x 新特性之索引页合并
  20. PHP 数字序数&amp;字母序数 相互转化

热门文章

  1. Java集合使用之next方法与remove方法 | Java集合使用之remove方法使用易错
  2. C# async await 死锁问题总结
  3. wow.js 使用及效果列表
  4. 使用阿里云 ECS 快速部署 WordPress 博客系统
  5. SpringBoot介绍与使用
  6. Spring Boot2 系列教程(三十一)Spring Boot 构建 RESTful 风格应用
  7. OpenLayers4 隐藏(hide)Feature
  8. js中如何将伪数组转换成数组
  9. 简单介绍HTTP的请求(get请求和post请求)以及对应的响应的内容
  10. Django后台管理系统的使用