From 3cfa6dcee8d438c67a3a45ace3897a53344fddbd Mon Sep 17 00:00:00 2001 From: orangain Date: Thu, 13 Aug 2015 00:31:23 +0900 Subject: [PATCH] Fix progress report Reported count and rate of processing were wrong: * Reported number of extracted articles was fewer than the true value by 1. * Reported rate of processing was completely different from the true value. --- WikiExtractor.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/WikiExtractor.py b/WikiExtractor.py index 1b36acd..d5de23c 100755 --- a/WikiExtractor.py +++ b/WikiExtractor.py @@ -2405,11 +2405,12 @@ def output_process(ordering_queue, docs_queue, out_file, file_size, file_compres if next_ordinal in ordering_buffer: output.write(ordering_buffer.pop(next_ordinal)) ordering_queue.task_done() - if (next_ordinal+1) % 100000 == 0: - interval_rate = (next_ordinal-interval_count) / (default_timer()/interval_start) - logging.info("Extracted %d articles (%.1f/s)", next_ordinal, interval_rate) + count_done = next_ordinal + 1 + if count_done % 100000 == 0: + interval_rate = (count_done - interval_count) / (default_timer() - interval_start) + logging.info("Extracted %d articles (%.1f/s)", count_done, interval_rate) interval_start = default_timer() - interval_count = next_ordinal + interval_count = count_done break ordinal, text = docs_queue.get() ordering_buffer[ordinal] = text