1 #coding=utf-8 2 from urllib2 import urlopen 3 from bs4 import BeautifulSoup 4 import urllib2 5 url="http://pythonscraping.com/pages/page1.html" 6 def getTitle(url): 7 """ 8 说明一下,处理异常的过程 9 1.检查是否能打开网页 异常类型为urllib2.HTTPError10 2.检查是否服务器存在,不存在返回空,那么在read是返回AttributeError11 :param url:12 :return:13 """14 try:15 16 html=urlopen(url)17 except urllib2.HTTPError as e:18 #这里的错误是网页不存在19 print e20 return None21 try:22 bsobj=BeautifulSoup(html.read(),"html.parser")23 title=bsobj.body.h124 except AttributeError as e:25 return None26 return title27 title=getTitle(url)28 if title is None:29 print "Title could not be found"30 else:31 print title