view migrate_couch_to_arango.py @ 259:304917762618 default tip

implementation of feed updates
author Dirk Olmes <dirk@xanthippe.ping.de>
date Tue, 12 Mar 2019 02:41:22 +0100
parents 75b81da8d7a5
children
line wrap: on
line source

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import couchdb
from datetime import datetime
import pyArango.connection


def get_or_cretae_arango_collection(arango_db, collection_name):
    if arango_db.hasCollection(collection_name):
        return arango_db[collection_name]
    else:
        return arango_db.createCollection(name=collection_name)

def migrate_feed(couch_doc):
    couch_id = couch_doc['_id']
    if couch_id.startswith('_design'):
        return
    try:
        if couch_doc['doctype'] == 'feed':
            arango_doc = arango_feed.createDocument()
            copy(couch_doc, arango_doc)
            convert_date(couch_doc, arango_doc, 'next_update')
            arango_doc.save()
            feed_mapping[couch_id] = arango_doc['_key']
    except KeyError:
            print('**** migrate error ' + str(document))

def convert_date(couch_doc, arango_doc, key):
    date_string = couch_doc[key]
    couch_date = datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ')
    # ignore the milliseconds - if they are printed through %f the number of 
    # digits is not what arangodb expects
    date_string = couch_date.strftime('%Y-%m-%dT%H:%M:%S.000')
    arango_doc[key] = date_string

def convert_date_tuple(couch_doc, arango_doc, key):
    tuple = couch_doc[key]
    couch_date = datetime(*tuple)
    date_string = couch_date.strftime('%Y-%m-%dT%H:%M:%S.000')
    arango_doc[key] = date_string

def migrate_rest(document):
    if document['_id'].startswith('_design'):
        return
    try:
        doctype = document['doctype']
        if doctype == 'feed':
            return
        if doctype == 'feedEntry':
            migrate_feed_entry(document)
        elif doctype == 'preferences':
            migrate_preferences(document)
        else:
            print('how to migrate ' + document['_id'])
    except KeyError as err:
            print('**** migrate error ' + str(document))

def migrate_feed_entry(couch_doc):
    arango_doc = arango_feed_entry.createDocument()
    copy(couch_doc, arango_doc)
    convert_date_tuple(couch_doc, arango_doc, 'create_timestamp')
    convert_date_tuple(couch_doc, arango_doc, 'updated')
    feed_id = arango_doc['feed']
    feed_id = feed_mapping[feed_id]
    arango_doc['feed'] = feed_id
    arango_doc.save()

def migrate_preferences(couch_doc):
    arango_doc = arango_preferences.createDocument()
    copy(couch_doc, arango_doc)
    arango_doc.save()

def copy(couch_doc, arango_doc):
    for key in couch_doc:
        if not key.startswith('_'):
            arango_doc[key] = couch_doc[key]

if __name__ == '__main__':
    couch_server = couchdb.Server('http://192.168.2.1:5984/')
    couch_db = couch_server['feedworm']

    arango_connection = pyArango.connection.Connection(arangoURL='http://xanthippe:8529', username='root', password='kahbHeotQDYL4R5o')
    arango_db = arango_connection['feedworm']
    arango_feed = get_or_cretae_arango_collection(arango_db, 'feed')
    arango_feed_entry = get_or_cretae_arango_collection(arango_db, 'feed_entry')
    arango_preferences = get_or_cretae_arango_collection(arango_db, 'preferences')

    feed_mapping = {}

    for id in couch_db:
        doc = couch_db[id]
        migrate_feed(doc)

    for id in couch_db:
        doc = couch_db[id]
        migrate_rest(doc)