异常处理

什么是异常处理 (处理异常，报错error)

print(1 / 0)  # 报了0除错误

# 打印结果：

Traceback (most recent call last):

  File "D:/pycharm_project/day07/01异常处理.py", line 18, in <module>

    print(1 / 0)

ZeroDivisionError: division by zero

捕捉异常

try:

    print(1 / 0)   # 有错误就跳过，没错误就执行

except ZeroDivisionError:  # 错误被捕捉了

    pass   # 啥也不做

print(1)

这种方法只能捕捉这个指定错误, 当然也有其他错误

try:

    print(x)

    print(1 / 0)   # 有错误就跳过，没错误就执行

except ZeroDivisionError:  # 错误被捕捉了

    pass   # 啥也不做

except NameError:

    pass

print(1)

这样很麻烦，异常有很多种

try:

    print('--------')

    print(a)

except Exception as e:   # 把错误信息输入出来，同时一般把错误信息记录到日志

    print(e)

print('--------')

# 打印结果：

--------

name 'a' is not defined

--------

这样就用Exception 就能自动识别错误，很方便~

字符串常用方法

索引取值

s = 'hello'

print(s[0])  # 取到h

切片

s = 'hello world'

print(s[0:5])  # 取到hello

成员运算

s = 'hello'

print('h' in s)   # 打印True

for循环

s = 'hello'

for i in s:

    print(i)

# 打印：

h

e

l

l

o

字符串长度

s = 'hello'

print(len(s))  # 打印字符串长度 5

strip()

s = '** hello  '

print(s.strip())  # 默认去掉两端空格

print(s.strip('*'))  # 去掉*

print(s.strip('* h'))  # 去掉* h

l.strip() 和 r.strip()

s = '**cwz**'

print(s.lstrip('*'))  # 去掉左边*

print(s.rstrip('*'))  # 去掉右边*

startswith()和endswith()

s = 'hello'

print(s.startswith('h'))   # True

print(s.endswith('o'))     # True

find() 寻找索引位置

s = 'hello'

print(s.find('h'))  # 打印0

print(s.find('x'))  # 字符串中没有的返回-1

print(s.find('l'))  # 字符串中有相同的，找第一个  返回2

index() 索引位置

s = 'hello'

print(s.index('o'))  # 打印4

print(s.index('a'))  # 字符串中没有的直接报错

join() 把列表中元素拼接起来

lt = ['a','b','c']

print('*'.join(lt))

# 打印结果：

a*b*c

split() 切割

s = 'a*b*c'

print(s.split('*'))  # 以*切割

# 打印结果：

['a', 'b', 'c']

replace 替换

s = 'reese neo'

print(s.replace('reese','cwz'))

# 打印结果：

cwz neo

center/ljust/rjust 居中/居左/居右

s = 'hello'

print(s.center(20,'-'))

print(s.ljust(20,'-'))

print(s.rjust(20,'-'))

# 打印结果：

-------hello--------

hello---------------

---------------hello

isdigit() 和 isalpha()

s = '123'

print(s.isdigit())  # 判断是否全为数字  True

print(s.isalpha())  # 判断是否全为字母  Flase

count 计数

s = 'aabcda'

print(s.count('a'))  # 打印3

selenium模块

什么是selenium

selenium是一个自动化测试工具

为什么要用selenium

通过selenium可以驱动浏览器，跳过登录滑动验证
缺点是爬虫效率低下

怎么使用selenium

安装驱动chromedriver http://npm.taobao.org/mirrors/chromedriver/
安装selenium pip3 install selenium

selenium基本使用

from selenium import webdriver  # 用来驱动浏览器

import time

# webdriver.Chrome('驱动绝对路径')

driver = webdriver.Chrome(r'E:\chromedriver_win32\chromedriver.exe')

driver.get('https://www.baidu.com')

time.sleep(10)  # 等待10s

driver.close()  # 关闭驱动

selenium驱动浏览器输入

from selenium import webdriver

import time

try:

    driver = webdriver.Chrome(r'E:\chromedriver_win32\chromedriver.exe')

    driver.get('https://www.baidu.com')  # 打开百度

    # 通过id查找输入框

    input_tag = driver.find_element_by_id('kw')

    input_tag.send_keys('四驱车')  # 输入查找内容

    # 通过id查找百度一下按钮

    submit_button = driver.find_element_by_id('kw')

    submit_button.click()  # 点击百度一下按钮

    time.sleep(10)

finally:    # 无论有没有异常，都会执行下面的代码，关闭驱动

    driver.close()   # 关闭驱动

selenium模拟登录

from selenium import webdriver

import time

from selenium.webdriver.common.keys import Keys  # 键盘按键操作

driver = webdriver.Chrome(r'E:\chromedriver_win32\chromedriver.exe')

try:

    driver.implicitly_wait(10)  # 等待浏览器加载数据10s

    driver.get('https://leetcode-cn.com/')

    # 通过文本查找登录按钮

    login_button = driver.find_element_by_link_text('登录')

    login_button.click()

    # 通过class查找用户名输入框

    username = driver.find_element_by_class_name('css-paawy7-BasicInput')

    username.send_keys('123456789@qq.com')

    # 通过name查找密码输入框

    password = driver.find_element_by_name('password')

    password.send_keys('123456')

    password.send_keys(Keys.ENTER)  # 直接按回车

    time.sleep(10)

finally:

    driver.close()

selenium爬取京东商品信息

from selenium import webdriver

import time

from selenium.webdriver.common.keys import Keys  # 键盘按键操作

driver = webdriver.Chrome(r'E:\chromedriver_win32\chromedriver.exe')

try:

    driver.implicitly_wait(10)  # 等待浏览器加载数据10s

    driver.get('https://www.jd.com/')

    # 通过id号查找输入框

    goods_input = driver.find_element_by_id('key')

    goods_input.send_keys('笔记本电脑')

    # 操纵键盘 按回车键

    goods_input.send_keys(Keys.ENTER)   # 直接按回车搜索

    time.sleep(3)   # 等待数据加载完成

    # 通过id查找所有商品的父标签

    goods_div = driver.find_element_by_id('J_goodsList')

    # 通过class 标签查找goods_div下的所有li标签

    goods_list = goods_div.find_elements_by_class_name('gl-item')

    # print(goods_list)

    # 循环 获取li标签

    for goods in goods_list:

        # 获取商品价格文本

        # goods_price = goods.find_element_by_link_text('p-price').text

        # css属性选择器

        goods_price = goods.find_element_by_css_selector('.p-price i').text

        goods_name = goods.find_element_by_css_selector('.p-name em').text

        goods_commit = goods.find_element_by_css_selector('.p-commit a').text

        goods_url = goods.find_element_by_css_selector('.p-name a').get_attribute('href')

        goods_data = f'''

        商品名称：{goods_name}

        商品价格：{goods_price}

        评价人数：{goods_commit}

        详情链接：{goods_url}

        '''

        with open('笔记本电脑.txt', 'a', encoding='utf8') as f:

            f.write(goods_data)

    time.sleep(10)

finally:

    driver.close()

巴特西

异常处理，常用字符串方法，selenium模块

异常处理

字符串常用方法

索引取值

切片

成员运算

for循环

字符串长度

strip()

l.strip() 和 r.strip()

startswith()和endswith()

find() 寻找索引位置

index() 索引位置

join() 把列表中元素拼接起来

split() 切割

replace 替换

center/ljust/rjust 居中/居左/居右

isdigit() 和 isalpha()

count 计数

selenium模块

什么是selenium

为什么要用selenium

怎么使用selenium

selenium基本使用

selenium驱动浏览器输入

selenium模拟登录

selenium爬取京东商品信息

最新文章

热门文章