From 59c0044511807f3bc6750eabfee10e53e0995810 Mon Sep 17 00:00:00 2001 From: lzjqsdd Date: Mon, 9 May 2016 10:04:44 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=B9=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/Global.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/Global.py b/tools/Global.py index a63bf07..523d229 100644 --- a/tools/Global.py +++ b/tools/Global.py @@ -1,11 +1,24 @@ +-*- coding: utf-8 -*- + +#项目根目录 project_root="../" +#抓取内容不包含新闻正文的数据文件 title_dir = project_root+"data/title.json" +#抓取内容包含新闻正文内容的数据文件 content_dir=project_root+"data/news.json" +#以sqlite文件存放的数据,暂未用到 db_dir = project_root+"data/news.db" +#停用词文件位置 stopword_dir=project_root+"data/stopword.txt" +#倒排索引的文件目录,以分块方式存储,包含的id.txt为字典 inverse_dir=project_root+"data/inversedata/" +#对抓取新闻分块切割,并提取关键词后的位置 cutnews_dir=project_root+"data/cutnews/" +#只做简单的分割,方便索引新闻的展示 cutnews_origin_dir=project_root+"data/orinews" +#每个分块文件记录的条数 filesize = 100 +#控制摘要的大小 snippetsize = 500 +#控制首页新闻的个数,避免加载过多 listsize = 15