s = '<SPAN style="FONT- SIZE: 9pt">开始1~3<SPAN lang=EN-US>& lt;?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" /><o:p></o:p></SPAN></SPAN>' import re d = re.sub('<[^
import scipy from sklearn.datasets import load_digits from sklearn.metrics import classification_report from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.model_selection import GridS
import re from bs4 import BeautifulSoup from lxml import etree html = '<p>你好</p><br/><font>哈哈</font><b>大家好</b>' # 法一 pattern = re.compile(r'<[^>]+>',re.S) result = pattern.sub('', html) print(result) # 法二
thr_msgs = soup.find_all('div',class_=re.compile('msg')) for i in thr_msgs: print(i) first = i.select('em:nth-of-type(1)') print(first) >>> <div class='\"msg\"'><em>佛山</em><em>1-3年</em><em>大专</em&g