Fix progress report

Reported count and rate of processing were wrong:

* Reported number of extracted articles was fewer than the true value by 1.
* Reported rate of processing was completely different from the true value.
This commit is contained in:
orangain 2015-08-13 00:31:23 +09:00
parent 5057c130cc
commit 3cfa6dcee8

View File

@ -2405,11 +2405,12 @@ def output_process(ordering_queue, docs_queue, out_file, file_size, file_compres
if next_ordinal in ordering_buffer:
output.write(ordering_buffer.pop(next_ordinal))
ordering_queue.task_done()
if (next_ordinal+1) % 100000 == 0:
interval_rate = (next_ordinal-interval_count) / (default_timer()/interval_start)
logging.info("Extracted %d articles (%.1f/s)", next_ordinal, interval_rate)
count_done = next_ordinal + 1
if count_done % 100000 == 0:
interval_rate = (count_done - interval_count) / (default_timer() - interval_start)
logging.info("Extracted %d articles (%.1f/s)", count_done, interval_rate)
interval_start = default_timer()
interval_count = next_ordinal
interval_count = count_done
break
ordinal, text = docs_queue.get()
ordering_buffer[ordinal] = text