这是一个简单的c++爬虫,效率并不是很高...

 #include<stdio.h>
int s1[],s2[];
void fun(int a, int b)
{
int i,ii;
bool t1,t2,t3,t4;
s1[] = s2[] = s1[] = s2[] = ;
for(i=a; i <= b; i++){
ii = i;
t1 = t2 = t3 = t4 =false;
while(ii != ){
int a = ii %;
if( a == )
{
t1 = true;
}
else if( a == )
{
t2 = true;
}
else if( a == )
{
t3 = true;
}
ii = ii / ;
}
if(t1 && t2 && t3){
s1[i-] = s1[i-] + ;
ii = i;
while(ii != ){
int a = ii % ;
int b = (ii / ) % ;
int c = (ii / ) % ;
if( c > && a == && b == && c ==)
t4 = true;
ii = ii / ;
}
if(t4)
s2[i-] = s2[i-] + ;
else
s2[i-] = s2[i-];
}
else{
s2[i-] = s2[i-];
s1[i-] = s1[i-];
}
}
} int main()
{
int a,b,i=;
fun(,);
while(scanf("%d%d",&a,&b) != EOF){
if(a == )
printf("Case %d:%d %d\n",i,s1[b-]-s1[a-],s2[b-]-s2[a-]);
else
printf("Case %d:%d %d\n",i,s1[b-]-s1[a-],s2[b-]-s2[a-]);
i++;
}
return ;
}
 #include"urlThread.h"
#include<QFile>
#include<QMessageBox>
#include<QTextStream>
#include <QMainWindow>
void urlThread::run()
{
open();
} void urlThread::startThread()
{
start();
} //显示找到的url
void urlThread::open()
{
QString path = "url.txt";
QFile file(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
// QMessageBox::warning(this,tr("Read File"),
// tr("Cannot open file:\n%1").arg(path));
send("error!cannot open url.txt!");
return;
}
QTextStream in(&file);
while(in.readLine().compare("") != ){
//ui->textBrowser->append(in.readLine());
send(q2s(in.readLine()));
Sleep();
}
file.close();
}
 #include "mainwindow.h"
#include <QApplication> int main(int argc, char *argv[])
{
QApplication a(argc, argv);
MainWindow w;
w.setWindowTitle("小小爬虫");
w.show(); return a.exec();
}
 #include "mainwindow.h"
#include "ui_mainwindow.h" MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent),
ui(new Ui::MainWindow)
{
ui->setupUi(this);
QObject::connect(ui->start,SIGNAL(released()),this,SLOT(beginGeturl()));
//QObject::connect(ui->display,SIGNAL(released()),this,SLOT(open()));
QObject::connect(ui->display,SIGNAL(released()),&uth,SLOT(startThread()));
QObject::connect(&uth,&urlThread::sendMessage,this,&MainWindow::receiveMessage);
QObject::connect(&crawler,&Crawler::sendMessage,this,&MainWindow::receiveMessage);
} MainWindow::~MainWindow()
{
delete ui;
} void MainWindow::receiveMessage(const QString name)
{
ui->textBrowser->append(name);
ui->textBrowser->moveCursor(QTextCursor::End);
} void MainWindow::open()
{
QString path = "url.txt";
QFile file(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
QMessageBox::warning(this,tr("Read File"),
tr("Cannot open file:\n%1").arg(path));
return;
}
QTextStream in(&file);
while(in.readLine().compare("") != ){
//ui->textBrowser->append(in.readLine());
crawler.send(q2s(in.readLine()));
}
file.close();
} void MainWindow::beginGeturl()
{
//crawler = new Crawler();
string url = "" ,dep, filter = "www";
if(!ui->site->text().isEmpty())
url = q2s(ui->site->text());
crawler.addURL(url);
int depth = ;
if(!ui->depth->text().isEmpty())
{
url = q2s(ui->depth->text());
depth = atoi(url.c_str());
}
if(!ui->filter->text().isEmpty())
filter = q2s(ui->filter->text());
crawler.setJdugeDomain(filter);
crawler.setDepth(depth);
crawler.startThread();
}
 #ifndef CRAWLER_H
#define CRAWLER_H #include<set>
#include<string>
#include<queue>
#include "winsock2.h"
#include <iostream>
#include <fstream>
#include <stdio.h>
#include<time.h>
#include<winsock.h>
#include<QThread> #pragma comment(lib, "ws2_32.lib")
using namespace std; bool ParseURL(const string & url, string & host, string & resource);
bool GetHttpResponse(const string & url, char * &response, int &bytesRead);
QString s2q(const string &s);
string q2s(const QString &s); #define DEFAULT_PAGE_BUF_SIZE 1000000 class Crawler: public QThread
{
Q_OBJECT
private:
queue<string> urlWaiting;
set<string> urlWaitset;
set<string> urlProcessed;
set<string> urlError;
set<string> disallow;
set<string>::iterator it;
int numFindUrl;
time_t starttime, finish;
string filter;
int depth; public:
Crawler(){ filter = "\0";numFindUrl = ;}
~Crawler(){}
void begin();
void setDepth(int depth);
void processURL(string& strUrl);
void addURL(string url);
void log(string entry, int num);
void HTMLParse(string & htmlResponse, const string & host);
bool getRobotx(const string & url, char * &response, int &bytesRead);
void setJdugeDomain(const string domain);
long urlOtherWebsite(string url);
void send(string s)
{
QString qs = s2q(s);
emit sendMessage(qs);
}
signals:
void sendMessage(const QString name); public slots:
bool startThread(); protected:
void run(); };
#endif // CRAWLER_H
 #ifndef MAINWINDOW_H
#define MAINWINDOW_H #include <QMainWindow>
#include<QFile>
#include<QMessageBox>
#include<QTextStream>
#include<QLineEdit>
#include<QDebug>
#include"crawler.h"
#include"urlThread.h" namespace Ui {
class MainWindow;
} class MainWindow : public QMainWindow
{
Q_OBJECT public:
explicit MainWindow(QWidget *parent = );
~MainWindow();
void receiveMessage(const QString name); public slots:
void beginGeturl();
void open(); private:
Ui::MainWindow *ui;
Crawler crawler;
urlThread uth;
}; #endif // MAINWINDOW_H
 #ifndef URLTHREAD_H
#define URLTHREAD_H
#include"crawler.h" class urlThread: public QThread
{
Q_OBJECT
public slots:
void startThread();
void open();
void send(string s)
{
QString qs = s2q(s);
emit sendMessage(qs);
}
signals:
void sendMessage(const QString name);
protected:
void run();
}; #endif // URLTHREAD_H
 <?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="windowTitle">
<string>MainWindow</string>
</property>
<widget class="QWidget" name="centralWidget">
<widget class="QLabel" name="label">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string>初始网址:</string>
</property>
</widget>
<widget class="QLineEdit" name="site">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string>http://www.scut.edu.cn</string>
</property>
</widget>
<widget class="QLabel" name="label_2">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string>搜索深度:</string>
</property>
</widget>
<widget class="QLineEdit" name="depth">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string></string>
</property>
</widget>
<widget class="QLabel" name="label_3">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string>过滤字符串:</string>
</property>
</widget>
<widget class="QLineEdit" name="filter">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string>scut</string>
</property>
</widget>
<widget class="QPushButton" name="start">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string>开始</string>
</property>
</widget>
<widget class="QPushButton" name="display">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
<property name="text">
<string>显示url</string>
</property>
</widget>
<widget class="QTextBrowser" name="textBrowser">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
</widget>
</widget>
<widget class="QMenuBar" name="menuBar">
<property name="geometry">
<rect>
<x></x>
<y></y>
<width></width>
<height></height>
</rect>
</property>
</widget>
<widget class="QToolBar" name="mainToolBar">
<attribute name="toolBarArea">
<enum>TopToolBarArea</enum>
</attribute>
<attribute name="toolBarBreak">
<bool>false</bool>
</attribute>
</widget>
<widget class="QStatusBar" name="statusBar"/>
</widget>
<layoutdefault spacing="" margin=""/>
<resources/>
<connections/>
</ui>

ui文件要自己设计

爬虫还有一些小问题,抓取的url content并不完全,某些地址还有一点小问题...

貌似博客园没有上传附件的地方?还是我没找到?希望得到提示~

最新文章

  1. css-a与a:link的一些认识
  2. python 字符串翻转
  3. ORA-00824:cannot set SGA_TARGET or MEMORY_TARGET due to existing internal settings
  4. dubbo源码之四——dubbo服务发布
  5. Servlet上下文
  6. 块状元素和内联元素 【inline block】
  7. 快递查询API接口(trackingmore)
  8. jq实现地址级联效果
  9. GTK+布局管理
  10. 学习笔记 css3--选择器&amp;新增颜色模式&amp;文本相关
  11. win8 VS2010 配制OpenGL
  12. MYSQL 命令行导入导出数据库文件
  13. shp文件显示
  14. Delphi中点击DBGrid某一行获得其详细数据方法
  15. jQuery查询性能考虑
  16. ambari下 hive metastore 启动失败
  17. ZOJ 4110 Strings in the Pocket (马拉车+回文串)
  18. 使用Open Live Write发布CSDN博客
  19. 什么?你竟然还没有用这几个chrome插件?
  20. Docker:容器的四种网络类型 [十三]

热门文章

  1. Hibernate,JPA注解@PrimaryKeyJoinColumn
  2. 使用磁盘为Linux添加swap
  3. Oracle游标练手实例
  4. http协议了解
  5. 【转】Program Files (x86)文件夹是干什么的
  6. hibernate关于一对一用法
  7. jquery 判断checkbox是否为空的三种方法
  8. u Calculate e 分类: HDU 2015-06-19 22:18 14人阅读 评论(0) 收藏
  9. JAVA基础知识之Set集合
  10. Apache common pool2 对象池