Support "real" UTF-8 by changing to utf8mb4 (winterrodeln tables) and binary (Mediawi...
[philipp/winterrodeln/wrpylib.git] / tests / test_mwdb.py
index cce9f2d10bddc76437d7a2e40855b9071ddf5d13..3e94135ee48ed5713ad42d0d87de12b0fdb25415 100644 (file)
@@ -1,5 +1,6 @@
 #!/usr/bin/python3.4
 import unittest
+import MySQLdb
 import sqlalchemy
 from sqlalchemy.engine import create_engine
 from sqlalchemy.orm import sessionmaker
@@ -9,7 +10,7 @@ import wrpylib.mwdb
 class TestMwDb(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        cls.engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8&use_unicode=1')
+        cls.engine = create_engine('mysql://philipp@localhost:3306/philipp_winterrodeln_wiki?charset=utf8mb4')
         cls.Session = sessionmaker(bind=cls.engine)
 
     def setUp(self):
@@ -23,23 +24,88 @@ class TestMwDb(unittest.TestCase):
         Page = wrpylib.mwdb.page_table(self.metadata)
         page = self.session.query(Page).filter(Page.c.page_id==1321).first()
         self.assertEqual(page.page_id, 1321)
+        self.assertEqual(type(page.page_title), str)
+        self.assertEqual(type(page.page_restrictions), bytes)
+        self.assertEqual(type(page.page_touched), str)
 
     def test_revision_table(self):
         Revision = wrpylib.mwdb.revision_table(self.metadata)
         revision = self.session.query(Revision).filter(Revision.c.rev_id == 666).first()
         self.assertEqual(revision.rev_id, 666)
+        self.assertEqual(type(revision.rev_comment), str)
+        self.assertEqual(type(revision.rev_user_text), str)
+        self.assertEqual(type(revision.rev_timestamp), str)
 
     def test_text_table(self):
         Text = wrpylib.mwdb.text_table(self.metadata)
         text = self.session.query(Text).filter(Text.c.old_id == 51).first()
         self.assertEqual(text.old_id, 51)
+        self.assertEqual(type(text.old_text), str)
+        self.assertEqual(type(text.old_flags), str)
+        self.assertEqual(text.old_flags, 'utf-8')
+
 
     def test_user_table(self):
         User = wrpylib.mwdb.user_table(self.metadata)
-        user = self.session.query(User).filter(User.c.user_id == 5).first()
-        self.assertEqual(user.user_id, 5)
+        user = self.session.query(User).filter(User.c.user_id == 1).first()
+        self.assertEqual(user.user_id, 1)
+        self.assertEqual(type(user.user_name), str)
+        self.assertEqual(type(user.user_real_name), str)
+        self.assertEqual(type(user.user_email), str)
+        self.assertEqual(user.user_name, 'Philipp')
+
 
     def test_categorylinks_table(self):
         Categorylinks = wrpylib.mwdb.categorylinks_table(self.metadata)
-        categorylinks = self.session.query(Categorylinks).filter(Categorylinks.c.cl_from == 229).first()
-        self.assertEqual(categorylinks.cl_from, 229)
+        categorylinks = self.session.query(Categorylinks).filter(Categorylinks.c.cl_from == 609).first()
+        self.assertEqual(categorylinks.cl_from, 609)
+        self.assertEqual(type(categorylinks.cl_to), str)
+        self.assertEqual(type(categorylinks.cl_sortkey), str)
+        self.assertEqual(categorylinks.cl_sortkey, 'ALT BĂ„RNBAD')
+
+
+class TestMySqlPython(unittest.TestCase):
+    """Note: Many of those tests failed in MySQL_python version 1.2.3 and earlier
+    because byte strings are returned instead of unicode for columns having
+    a _bin collation, see https://sourceforge.net/p/mysql-python/bugs/289/
+    This has been fixed in MySQL_python version 1.2.4."""
+    @classmethod
+    def setUpClass(cls):
+        cls.db = MySQLdb.connect(db='philipp_winterrodeln_wiki', charset='utf8mb4')
+
+    def setUp(self):
+        self.cursor = self.db.cursor()
+
+    def exec_sql(self, sql):
+        self.cursor.execute(sql)
+        row = self.cursor.fetchone()
+        return row
+
+    def test_datatype_page(self):
+        result = self.exec_sql('select page_title, page_restrictions, page_touched from page where page_id = 1321')
+        self.assertEqual(type(result[0]), str)  # varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL
+        self.assertEqual(type(result[1]), bytes)  # tinyblob NOT NULL
+        self.assertEqual(type(result[2]), bytes)  # binary(14) NOT NULL
+
+    def test_datatype_revision(self):
+        result = self.exec_sql('select rev_comment, rev_user_text, rev_timestamp from revision where rev_id = 7586')
+        self.assertEqual(type(result[0]), bytes)  # tinyblob NOT NULL
+        self.assertEqual(type(result[1]), str)  # varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL
+        self.assertEqual(type(result[2]), bytes)  # binary(14) NOT NULL
+
+    def test_datatypes_text(self):
+        result = self.exec_sql('select old_text, old_flags from text where old_id = 7438')
+        self.assertEqual(type(result[0]), bytes)  # mediumblob NOT NULL
+        self.assertEqual(type(result[1]), bytes)  # tinyblob NOT NULL
+
+    def test_datatype_user(self):
+        result = self.exec_sql('select user_name, user_real_name, user_email from user where user_id = 1')
+        self.assertEqual(type(result[0]), str)  # varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL
+        self.assertEqual(type(result[1]), str)  # varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL
+        self.assertEqual(type(result[2]), str)  # tinytext NOT NULL
+        assert result[0] == 'Philipp'
+
+    def test_datatype_categorylinks(self):
+        result = self.exec_sql('select cl_to, cl_sortkey from categorylinks where cl_from = 609')
+        self.assertEqual(type(result[0]), str)  # varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL
+        self.assertEqual(type(result[1]), bytes)  # varbinary(230) NOT NULL