100 lines
3.1 KiB
Python
100 lines
3.1 KiB
Python
# coding: utf-8
|
|
# This test must pass both in Python 2 and 3.
|
|
from __future__ import unicode_literals
|
|
|
|
import sys
|
|
import unittest
|
|
|
|
from WikiExtractor import (
|
|
normalizeTitle, unescape, ucfirst, lcfirst, splitParts,
|
|
fullyQualifiedTemplateTitle, NextFile
|
|
)
|
|
|
|
|
|
class TestNormalizeTitle(unittest.TestCase):
|
|
|
|
def test_known_namespace(self):
|
|
self.assertEqual(normalizeTitle("Template: Births"), "Template:Births")
|
|
self.assertEqual(normalizeTitle(" template: births_"), "Template:Births")
|
|
|
|
def test_not_known_namespace(self):
|
|
self.assertEqual(normalizeTitle("Category: Births"), "Category: Births")
|
|
self.assertEqual(normalizeTitle("_category: births___"), "Category: Births")
|
|
|
|
def test_no_namespace(self):
|
|
self.assertEqual(normalizeTitle("python"), "Python")
|
|
self.assertEqual(normalizeTitle("python 3"), "Python 3")
|
|
self.assertEqual(normalizeTitle("python__3"), "Python 3")
|
|
|
|
|
|
class TestStringUtils(unittest.TestCase):
|
|
|
|
def test_unescape(self):
|
|
self.assertEqual(unescape('"'), '"')
|
|
self.assertEqual(unescape('&'), '&')
|
|
self.assertEqual(unescape('あ'), '\u3042')
|
|
if sys.maxunicode > 0xFFFF:
|
|
# Python 3 or UCS-4 build of Python 2
|
|
self.assertEqual(unescape('𝕆'), '\U0001D546')
|
|
self.assertEqual(unescape('𝓁'), '\U0001d4c1')
|
|
else:
|
|
# UCS-2 build of Python 2
|
|
self.assertEqual(unescape('𝕆'), '𝕆')
|
|
self.assertEqual(unescape('𝓁'), '𝓁')
|
|
|
|
def test_ucfirst(self):
|
|
self.assertEqual(ucfirst('python'), 'Python')
|
|
|
|
def test_lcfirst(self):
|
|
self.assertEqual(lcfirst('Python'), 'python')
|
|
|
|
|
|
class TestSplitParts(unittest.TestCase):
|
|
|
|
def test_simple(self):
|
|
self.assertEqual(splitParts("p=q|q=r|r=s"), ['p=q', 'q=r', 'r=s'])
|
|
|
|
def test_complex(self):
|
|
self.assertEqual(splitParts('{{#if: {{{1}}} | {{lc:{{{1}}} | "parameter missing"}}'),
|
|
['{{#if: {{{1}}} ', ' {{lc:{{{1}}} ', ' "parameter missing"}}'])
|
|
|
|
self.assertEqual(splitParts('''{{if:|
|
|
|{{#if:the president|
|
|
|{{#if:|
|
|
[[Category:Hatnote templates|A{{PAGENAME}}]]
|
|
}}
|
|
}}
|
|
}}'''), ['''{{if:|
|
|
|{{#if:the president|
|
|
|{{#if:|
|
|
[[Category:Hatnote templates|A{{PAGENAME}}]]
|
|
}}
|
|
}}
|
|
}}'''])
|
|
|
|
|
|
class TestFullyQualifiedTemplateTitle(unittest.TestCase):
|
|
|
|
def test_main_namespace(self):
|
|
self.assertEqual(fullyQualifiedTemplateTitle(':Python'), 'Python')
|
|
self.assertEqual(fullyQualifiedTemplateTitle(':python'), 'Python')
|
|
|
|
def test_other_namespace(self):
|
|
self.assertEqual(fullyQualifiedTemplateTitle('User:Orange'), 'User:Orange')
|
|
|
|
|
|
class TestNextFile(unittest.TestCase):
|
|
|
|
def test_next(self):
|
|
f = NextFile('out')
|
|
self.assertEqual(next(f), 'out/AA/wiki_00')
|
|
self.assertEqual(next(f), 'out/AA/wiki_01')
|
|
for _ in range(97):
|
|
next(f)
|
|
self.assertEqual(next(f), 'out/AA/wiki_99')
|
|
self.assertEqual(next(f), 'out/AB/wiki_00')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|