更新图片5

This commit is contained in:
ruben 2019-11-21 23:52:33 +08:00
parent 97252ff6b6
commit 47231ff9a8
2 changed files with 23 additions and 20 deletions

View File

@ -15,6 +15,8 @@ if not os.path.exists('logs/'):
os.mkdir('logs/')
logging.basicConfig(filename=f'logs/{logfile_name}.log', filemode='a+',
format='%(levelname)s - %(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
class BaikeSpider(scrapy.Spider):
name = 'baike'
allowed_domains = ['baike.baidu.com']
@ -79,9 +81,8 @@ class BaikeSpider(scrapy.Spider):
'//dt//text()').getall()
attr = ''.join(temp).replace('\xa0', '')
# value
values = Selector(text=value).xpath(
'//dd/text()|//dd/a//text()').getall()
for value in values:
value = ''.join(Selector(text=value).xpath(
'//dd/text()|//dd/a//text()').getall())
try:
value = value.replace('\n', '')
logging.warning(entity+'_'+attr+'_'+value)
@ -93,6 +94,8 @@ class BaikeSpider(scrapy.Spider):
)
except pymongo.errors.DuplicateKeyError:
pass
session.write_transaction(self.add_node, entity, attr, value)
session.write_transaction(
self.add_node, entity, attr, value)
except Exception:
logging.error('\n---'.join(attrs)+'\n_________________'+'\n---'.join(values))
logging.error('\n---'.join(attrs) +
'\n_________________'+'\n---'.join(values))

Binary file not shown.

Before

Width:  |  Height:  |  Size: 312 KiB

After

Width:  |  Height:  |  Size: 334 KiB