import requests import re import io from bs4 import BeautifulSoup url1 = "http://www.ytu.edu.cn/xxyw/index.jhtml" url2 = "http://www.ldu.edu.cn/index/zhyw.htm" fytu = open("ytu_news.csv","w") fldu = open("ldu_news.csv","w") def getHtmlText(url): try : r= requests.get(url,timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "出现灾难性的错误" r = getHtmlText(url2) soup = BeautifulSoup(r,"lxml") news = soup.find_all("a",{"class":"c49418"}) for line in news: link=line["title"]+","+line["href"] fytu.writelines(link) fytu.write('\n') fytu.close() print("-------------------------------------------------------------------------") r = getHtmlText(url1) pattern = re.compile(r'<a href="(.*)" title="(.*)">.*</a>') items = pattern.findall(r) for item in items: link=item[1]+","+item[0] fldu.writelines(link) fldu.write('\n') fldu.close()