day 18 - 2 正则与 re 模块练习

1、爬虫的例子

#爬虫的例子（方法一）

import re

import urllib,request import urlopen

def getPage(url):

    response = urlopen(url)

    return response.read().decode('utf-8')

def parsePage(s):

    ret = re.findall(

        '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>\d+).*?<span class="title">(?P<title>.*?)</span>'

       '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',s,re.S)

    return ret

def main(num):

    url = 'https://movie.douban.com/top250?start=%s&filter=' % num

    response_html = getPage(url)

    ret = parsePage(response_html)

    print(ret)

count = 0

for i in range(10):   # 10页

    main(count)

    count += 25

# url 从网页上把代码搞下来

# bytes decode ——> utf-8 网页内容就是我的待匹配字符串

# ret = re.findall(正则，带匹配的字符串)  #ret是所有匹配到的内容组成的列表

#爬虫的例子（方法一）

import requests

import re

import json

def getPage(url):

    response=requests.get(url)

    return response.text

def parsePage(s):

    com=re.compile('<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>\d+).*?<span class="title">(?P<title>.*?)</span>'

                   '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',re.S)

    ret=com.finditer(s)

    for i in ret:

        yield {

            "id":i.group("id"),

            "title":i.group("title"),

            "rating_num":i.group("rating_num"),

            "comment_num":i.group("comment_num"),

        }

def main(num):

    url='https://movie.douban.com/top250?start=%s&filter='%num

    response_html=getPage(url)

    ret=parsePage(response_html)

    print(ret)

    f=open("move_info7","a",encoding="utf8")

    for obj in ret:

        print(obj)

        data=json.dumps(obj,ensure_ascii=False)

        f.write(data+"\n")

if __name__ == '__main__':

    count=0

    for i in range(10):

        main(count)

        count+=25

1、计算器

#计算下面式子

a = '1 - 2 * ( ( 6 0 -3 0  +(-40/5) * (9-2*5/3 + 7 /3*99/4*2998 +10 * 568/14 )) - (-4*3)/ (16-3*2) )'

import re

def format(new_equation):

    new_equation = new_equation.replace('+-','-')

    new_equation = new_equation.replace('--', '+')

    return new_equation

def cal(val_son):

    '''加减乘除的计算'''

    #print(new_val)

    if '/' in val_son:

        a,b = val_son.split('/')

        return str(float(a)/float(b))

    elif '*' in val_son:

        a,b = val_son.split('*')

        return str(float(a)*float(b))

def no_brackets(val):

    '''去括号'''

    new_val = val.strip('()')

    while True:

        ret = re.search('\d+\.?\d*[*/]-?\d+\.?\d*',new_val) #匹配第一个乘除

        if ret: #说明 表达式中海油乘除法

            val_son = ret.group()  #子表达式

            ret = cal(val_son)

            new_val = new_val.replace(val_son,ret)

            new_val = format(new_val)

        else:

            ret = re.findall('-?\d+\.?\d*',new_val)

            sum =

            for i in ret:

                sum += float(i)

            return str(sum)

def func(new_equation):

    while True:

        val = re.search('\([^()]+\)',new_equation)

        if val:

            val = val.group()

            ret = no_brackets(val)

            new_equation = new_equation.replace(val,ret)

            new_equation = format(new_equation)

        else:

            return no_brackets(new_equation)

a = input("请输入要计算的式子>>>")

new_equation = a.replace(' ','')

print(func(new_equation))

巴特西

day 18 - 2 正则与 re 模块练习

最新文章

热门文章