define custom thread number,modified saved path

This commit is contained in:
Your Name 2018-03-23 11:45:06 +08:00
parent 122a696e09
commit 1f123e5758
2 changed files with 12 additions and 19 deletions

View File

@ -36,9 +36,9 @@ class HtmlParser(object):
title_sub__text='' title_sub__text=''
filename = title_node.get_text() + title_sub__text filename = title_node.get_text() + title_sub__text
if not os.path.exists('webpages/'): if not os.path.exists('/data/ruben/data/webpages/'):
os.mkdir('webpages/') os.mkdir('/data/ruben/data/webpages/')
with open('webpages/' + filename.replace('/',''), 'w') as f: with open('/data/ruben/data/webpages/' + filename.replace('/',''), 'w') as f:
f.write(html_cont.decode('utf-8')) f.write(html_cont.decode('utf-8'))
print('Save to disk filename:'+f.name+"") print('Save to disk filename:'+f.name+"")
return res_data return res_data

View File

@ -38,28 +38,21 @@ if __name__=='__main__':
urls = url_manager.UrlManager() urls = url_manager.UrlManager()
downloader = html_downloader.HtmlDownloader() downloader = html_downloader.HtmlDownloader()
parser = html_parser.HtmlParser() parser = html_parser.HtmlParser()
th1 = MyThread() list_thread=[]
th2 = MyThread() count_thread=12
th3 = MyThread() for i in range(count_thread):
th4 = MyThread() list_thread.append(MyThread())
try: try:
if os.path.exists('urls.bin'): if os.path.exists('urls.bin'):
urls=pickle.load(open('urls.bin','rb')) urls=pickle.load(open('urls.bin','rb'))
else: else:
urls.add_new_url(root_url) urls.add_new_url(root_url)
th1.start() for th in list_thread:
th2.start() th.start()
th3.start() th.join()
th4.start()
th1.join()
th2.join()
th3.join()
th4.join()
except: except:
th1.terminate() for th in list_thread:
th2.terminate() th.terminate()
th3.terminate()
th4.terminate()
print('error!', sys.exc_info()[0]) print('error!', sys.exc_info()[0])
finally: finally:
print('save state') print('save state')