movedb/movedb.py
author Yury Yurevich <the.pythy@gmail.com>
Tue Apr 28 17:31:48 2009 +0700 (4 days ago)
changeset 16 e796083c981e
permissions -rwxr-xr-x
movedb: A tool for moving data from one rdbms to another
     1 #!/usr/bin/env python
     2 # encoding: utf-8
     3 """
     4 Move all data and schema from one database to another
     5 
     6 Tool is created mostly for Django app migration from sqlite to mysql
     7 
     8 Copyright 2009 Yury Yurevich
     9 Licensed under terms of GNU GPL2
    10 """
    11 
    12 
    13 import sqlalchemy as sa
    14 import logging
    15 import sys
    16 
    17 _CACHED_TABLE_ORDER = []
    18 
    19 def reflect_db(engine):
    20     meta = sa.MetaData()
    21     meta.bind = engine
    22     meta.reflect()
    23     return meta
    24 
    25 def reorder_tables(meta):
    26     global _CACHED_TABLE_ORDER
    27     if _CACHED_TABLE_ORDER:
    28         return _CACHED_TABLE_ORDER
    29     dependencies = {}
    30     table_order = []
    31     all_tables = []
    32     tables = meta.tables
    33     for table in tables.values():
    34         all_tables.append(table.name)
    35         for fk in table.foreign_keys:
    36             remote_table_name = fk.column.table.name
    37             dependencies[table.name] = dependencies.get(table.name, []) + \
    38                                        [remote_table_name]
    39         # first, fill all tables without any dependencies
    40         if table.name not in dependencies and table.name not in table_order:
    41             table_order.append(table.name)
    42 
    43     unordered_tables = all_tables[:]
    44     i = 0
    45     while unordered_tables:
    46         i += 1
    47         if i > 1000*len(all_tables):
    48             short_deps = dict((u, dependencies[u]) for u in unordered_tables)
    49             logging.error('Infinite loop detected for data: unordered_tables=%s' % (unordered_tables,))
    50             logging.error('Infinite loop detected for data: unordered_tables depends on %s' % (short_deps,))
    51             logging.error('Infinite loop detected for data: already ordered tables are %s' % (table_order,))
    52             logging.error('Infinite loop detected for data: tables full dependencies are %s' % (dependencies,))
    53             raise RuntimeError('Infinite loop detected: cannot reorder tables %s' % (unordered_tables,))
    54         for t in all_tables:
    55             if t not in unordered_tables:
    56                 continue
    57             if t in table_order:
    58                 unordered_tables.remove(t)
    59                 continue
    60             already_all_deps = True
    61             for dep in dependencies[t]:
    62                 if dep not in table_order:
    63                     already_all_deps = False
    64             if already_all_deps:
    65                 unordered_tables.remove(t)
    66                 if t not in table_order:
    67                     table_order.append(t)
    68 
    69     _CACHED_TABLE_ORDER = [tables[k] for k in table_order]
    70     return _CACHED_TABLE_ORDER
    71 
    72 def provide_schema(meta_from, meta_to):
    73     logging.info('syncing tables')
    74     for table in reorder_tables(meta_from):
    75         newtable = table.tometadata(meta_to)
    76         if newtable.exists():
    77             logging.debug('table %s already exists in target db' % newtable.name)
    78         else:
    79             logging.debug('creating table %s in target db' % newtable.name)
    80             newtable.create()
    81 
    82 def flatten_record(rec):
    83     return dict((key, getattr(rec, key)) for key in rec.keys())
    84 
    85 def move_data_by_table(table_from, table_to):
    86     conn_from = table_from.bind.connect()
    87     conn_to = table_to.bind.connect()
    88     query = sa.select([table_from])
    89     for rec in conn_from.execute(query).fetchall():
    90         data = flatten_record(rec)
    91         conn_to.execute(table_to.insert(values=data))
    92     conn_from.close()
    93     conn_to.close()
    94 
    95 def clean_target(meta):
    96     logging.info('cleaning target db')
    97     conn = meta.bind.connect()
    98     for table in reversed(reorder_tables(meta)):
    99         logging.debug('cleaning data in table %s at target db' % table)
   100         conn.execute(table.delete())
   101     conn.close()
   102 
   103 def move_data(meta_from, meta_to):
   104     logging.info('moving data')
   105     for table in reorder_tables(meta_from):
   106         logging.debug('moving data for table %s to target db' % table)
   107         newtable = table.tometadata(meta_to)
   108         move_data_by_table(table, newtable)
   109 
   110 def move(from_db, to_db, clean_target_data=False):
   111     engine_from = sa.create_engine(from_db)
   112     logging.info('source db: %s' % engine_from)
   113     engine_to = sa.create_engine(to_db)
   114     logging.info('target db: %s' % engine_to)
   115     meta_from = reflect_db(engine_from)
   116     meta_to = reflect_db(engine_to)
   117     provide_schema(meta_from, meta_to)
   118     if clean_target_data:
   119         clean_target(meta_to)
   120     move_data(meta_from, meta_to)
   121 
   122 
   123 def usage():
   124     print "movedb.py [--clean] <source-sqlalchemy-uri> <target-sqlalchemy-uri>"
   125     sys.exit(1)
   126 
   127 def parse_args(args):
   128     if len(args) == 2:
   129         # without cleaning
   130         if args[0] == '--clean':
   131             usage()
   132         from_db = args[0]
   133         to_db = args[1]
   134         clean = False
   135     elif len(args) == 3:
   136         if args[0] != '--clean':
   137             usage()
   138         from_db = args[1]
   139         to_db = args[2]
   140         clean = True
   141     else:
   142         usage()
   143     return from_db, to_db, clean
   144 
   145 def main(args):
   146     from_db, to_db, clean = parse_args(args)
   147     move(from_db, to_db, clean)
   148 
   149 if __name__ == '__main__':
   150     logging.basicConfig(level=logging.DEBUG)
   151     main(sys.argv[1:])
   152