不重复访问网站,使用队列的程序from bs4 import BeautifulSoupimport urllib.requestclass Queue: def __init__(self): self.st=[] def fetch(self): return self.st.pop(0) def enter(self,obj): self.st.append(obj) def empty(self): return len(self.st)==0def spider(url): global urls queue=Queue() queue.enter(url) while ________________________: url=queue.fetch() if url not in urls: try: urls.append(url) data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for link in links: ________________ url=start_url+"/"+href queue.enter(url) except Exception as err: print(err)start_url="http://127.0.0.1:5000"urls=[]spider(start_url)print("The End")