第一个爬虫和测试
(1)完善球赛程序,测试你写的球赛程序
from random import random def printIntro(): try: print("模拟羽毛球比赛") print("学号:2019310143108") except: print("printIntro error!") def getInputs(): try: a = eval(input("A的能力值:")) b = eval(input("B的能力值:")) n = eval(input("场次:")) return a,b,n except: print("getInputs error!") def gameOver(scoreA,scoreB): try: return (max(scoreA,scoreB)==21 and min(scoreA,scoreB)<20) or \ (max(scoreA,scoreB)==30) or \ (scoreA>20 and scoreB>20 and abs(scoreA-scoreB)>1) except: print("gameOver error!") def simOneGame(probA,probB): try: scoreA,scoreB = 0,0 serving = "A" while not gameOver(scoreA,scoreB): if serving == "A": if random() < probA: scoreA += 1 else: serving = "B" else: if random() < probB: scoreB += 1 else: serving = "A" return scoreA,scoreB except: print("simOneGame error!") def simNGame(n,probA,probB): try: winsA,winsB = 0,0 for i in range(n): if winsA<(n//2)+1 and winsB<(n//2)+1: scoreA,scoreB = simOneGame(probA,probB) if scoreA > scoreB: winsA += 1 else: winsB += 1 else: break return winsA,winsB except: print("printIntro error!") def printSummary(winsA,winsB): try: n = winsA + winsB if winsA > winsB: X = "A" else: X = "B" print("共模拟了{}场比赛".format(n)) print("A获胜{}场,占比{:0.1%}".format(winsA,winsA/n)) print("B获胜{}场,占比{:0.1%}".format(winsB,winsB/n)) print("{}获胜".format(X)) except: print("printSummary error!") def main(): printIntro() probA,probB,n = getInputs() winsA,winsB = simNGame(n,probA,probB) printSummary(winsA,winsB) main()
正确结果:
错误结果:
(2)请用requests库的get()函数访问“360搜索主页”20次,打印返回状态,text()内容,计算text()属性和content属性所返回网页内容的长度。
import requests def getHTMLText(url): try: for i in range(0,20): r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = 'utf-8' return r.status_code,r.text,r.content,len(r.text),len(r.content) except: return "" url = "https://www.360.cn/?tn=80035161_1_dg" print(getHTMLText(url))
返回状态:
返回网页内容的长度:
(4)爬中国大学排名网站内容
import requests from bs4 import BeautifulSoup allUniv = [] def getHTMLText(ur1): try: r = requests.get(ur1,timeout=30) r.raise_for_status() r.encoding = 'utf-8' return r.text except: return "" def fillUnivList(soup): data = soup.find_all('tr') for tr in data: ltd = tr.find_all('td') if len(ltd) == 0: continue singleUniv = [] for td in ltd: singleUniv.append(td.string) allUniv.append(singleUniv) def printUnivList(num): print("{1:^2}{2:{0}^10}{3:{0}^4}{4:{0}^4}{5:{0}^10}".format\ (chr(12288),"排名","学校名称","省市","总分","培养规模")) for i in range(num): u = allUniv[i] print("{1:^4}{2:{0}^10}{3:{0}^5}{4:{0}^8.1f}{5:{0}^10}".format\ (chr(12288),u[0],u[1],u[2],eval(u[3]),u[6])) def main(num): ur1 = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html" html = getHTMLText(ur1) soup = BeautifulSoup(html,"html.parser") fillUnivList(soup) printUnivList(num) main(10)
只拿中国大学排名的前十所大学: