python练习册每天一个小程序第0013题

# -*-coding:utf-8-*—

'''

    题目描述：

         用 Python 写一个爬图片的程序，爬 这个链接里的日本妹子图片 :-)

    地址：

        http://tieba.baidu.com/p/2166231880

    思路：

        用正则表达式匹配图片链接，然后进行下载

'''

'''

import re

import requests

def main():

    url = 'http://tieba.baidu.com/p/2166231880'

    response = requests.get(url)

    html = response.text

    match = re.compile('img .*?src=\"(.*?)\"')

    for i in  match.findall(html):

        if 'imgsrc' in i :

            print i

if __name__ == '__main__':

    main()

'''

import urllib2

import re

from os.path import basename

from urlparse import urlsplit

url = "http://tieba.baidu.com/p/2166231880"

def getPage(url):

    url=url+"?see_lz=1"

    urlContent = urllib2.urlopen(url).read()

    page='<span class="red">(.*?)</span>'

    thePage=re.findall(page,urlContent)

    return int(thePage[0])

def downImg(url):

    urlContent = urllib2.urlopen(url).read()

    spans='<cc>(.*?)</cc>'

    ss=re.findall(spans,urlContent)

    obImgs=','.join(ss)

    imgUrls = re.findall('img .*?src="(.*?)"', obImgs)

    for imgUrl in imgUrls:

        print imgUrl

        '''

        try:

            imgData = urllib2.urlopen(imgUrl).read()

            fileName = basename(urlsplit(imgUrl)[2])

            output = open(fileName,'wb')

            output.write(imgData)

            output.close()

        except:

            print "Er.."

        '''

def downLoad(url):

    numb=getPage(url)

    cont=0

    print "There are "+str(numb)+" pages."

    while cont<numb:

        cont+=1

        print "Downloading "+url+"?see_lz=1&pn="+str(cont)+"..."

        downImg(url+"?see_lz=1&pn="+str(cont))

    print 'Completed!'

downImg(url)

巴特西

python练习册每天一个小程序第0013题

最新文章

热门文章

巴特西

python练习册 每天一个小程序 第0013题

最新文章

热门文章

python练习册每天一个小程序第0013题