fun下载内容批量收集

1.download title and url

#!/usr/bin/env python

#-*- coding:utf-8 -*-

import re, urllib2,threading

def geturltitle(match, file):

    s = match.group();

    p = re.compile(r'^\[mukio=file\]');

    downurl = re.sub(p, '', s);

    print downurl;

    # 过滤url

    if downurl:

        file.writelines(downurl);

        file.write('\n');

        # for line in downurl:

        #     file.write(line);

    # 过滤title

    pattern1 = re.compile(r'<meta name="keywords" content="\S.*"');

    match1 = pattern1.search(respread);

    if match1:

        s1 = match1.group();

        p1 = re.compile(r'^<meta name="keywords" content="');

        title = re.sub(p1, '', s1);

        print title;

        if title:

            file.writelines(title);

            file.write('\n\n');

            # for line in title:

            #     file.write(line);

while 1:

    file = open('avfun1.txt', 'w');

    for  n in range(3600,9000):

        try:

            resp = urllib2.urlopen('http://www.avfun1.com/forum.php?mod=viewthread&tid='+repr(n)+'&mobile=yes', timeout = 2);

            respread = resp.read();

            pattern = re.compile(r'\[mukio=file\]\S.*mp4');

            match = pattern.search(respread);

            print "pid = " + repr(n)

            if match:

                threading.Thread(target=geturltitle(match, file)).start();

            # else:

            #     continue;

            pass

        except Exception, e:

            print e;

            pass

        else:

            pass

        finally:

            pass

        

    file.close();

    break;

2.rename title from file

#!/usr/bin/env python

#-*- coding:utf-8 -*-

import re, os

dir = "/Users/apple/Downloads/avfun1/" #文件目录

if os.path.isdir(dir): #检验目录是否有误

  print ("Directory exists!")

else:

  print ("Directory not exist.")

filelist=os.listdir(dir+'aaa')

file = open(dir+'avfun1.txt', 'rb');

dir = dir + 'aaa'

'''for line in file:

    print line

'''

str = file.read()

for name in filelist:

    match = re.search(name+r'\n\S.*', str)

    if match:

        str1 = match.group();

        tt_match = re.search(r'[^\d.mp4\n].*$', str1)

        newfile = tt_match.group()+'.mp4' #获取匹配名存为newfile

        print name

        print newfile

        os.rename(os.path.join(dir,name),os.path.join(dir,newfile))

        

    else:

print match

巴特西

fun下载内容批量收集

最新文章

热门文章