annotate migrate_couch_to_arango.py @ 259:304917762618 default tip

implementation of feed updates
author Dirk Olmes <dirk@xanthippe.ping.de>
date Tue, 12 Mar 2019 02:41:22 +0100
parents 75b81da8d7a5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
255
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
1 #!/usr/bin/env python
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
3
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
4 import couchdb
256
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
5 from datetime import datetime
255
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
6 import pyArango.connection
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
7
256
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
8
255
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
9 def get_or_cretae_arango_collection(arango_db, collection_name):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
10 if arango_db.hasCollection(collection_name):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
11 return arango_db[collection_name]
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
12 else:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
13 return arango_db.createCollection(name=collection_name)
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
14
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
15 def migrate_feed(couch_doc):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
16 couch_id = couch_doc['_id']
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
17 if couch_id.startswith('_design'):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
18 return
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
19 try:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
20 if couch_doc['doctype'] == 'feed':
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
21 arango_doc = arango_feed.createDocument()
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
22 copy(couch_doc, arango_doc)
256
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
23 convert_date(couch_doc, arango_doc, 'next_update')
255
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
24 arango_doc.save()
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
25 feed_mapping[couch_id] = arango_doc['_key']
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
26 except KeyError:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
27 print('**** migrate error ' + str(document))
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
28
256
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
29 def convert_date(couch_doc, arango_doc, key):
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
30 date_string = couch_doc[key]
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
31 couch_date = datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ')
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
32 # ignore the milliseconds - if they are printed through %f the number of
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
33 # digits is not what arangodb expects
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
34 date_string = couch_date.strftime('%Y-%m-%dT%H:%M:%S.000')
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
35 arango_doc[key] = date_string
f79be01821c4 Arangodb backend, first version which barely works for reading
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 255
diff changeset
36
257
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
37 def convert_date_tuple(couch_doc, arango_doc, key):
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
38 tuple = couch_doc[key]
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
39 couch_date = datetime(*tuple)
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
40 date_string = couch_date.strftime('%Y-%m-%dT%H:%M:%S.000')
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
41 arango_doc[key] = date_string
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
42
255
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
43 def migrate_rest(document):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
44 if document['_id'].startswith('_design'):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
45 return
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
46 try:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
47 doctype = document['doctype']
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
48 if doctype == 'feed':
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
49 return
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
50 if doctype == 'feedEntry':
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
51 migrate_feed_entry(document)
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
52 elif doctype == 'preferences':
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
53 migrate_preferences(document)
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
54 else:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
55 print('how to migrate ' + document['_id'])
257
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
56 except KeyError as err:
255
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
57 print('**** migrate error ' + str(document))
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
58
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
59 def migrate_feed_entry(couch_doc):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
60 arango_doc = arango_feed_entry.createDocument()
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
61 copy(couch_doc, arango_doc)
257
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
62 convert_date_tuple(couch_doc, arango_doc, 'create_timestamp')
75b81da8d7a5 convert the feed entry timestamps to arango compatible date strings in migration
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 256
diff changeset
63 convert_date_tuple(couch_doc, arango_doc, 'updated')
255
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
64 feed_id = arango_doc['feed']
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
65 feed_id = feed_mapping[feed_id]
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
66 arango_doc['feed'] = feed_id
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
67 arango_doc.save()
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
68
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
69 def migrate_preferences(couch_doc):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
70 arango_doc = arango_preferences.createDocument()
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
71 copy(couch_doc, arango_doc)
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
72 arango_doc.save()
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
73
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
74 def copy(couch_doc, arango_doc):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
75 for key in couch_doc:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
76 if not key.startswith('_'):
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
77 arango_doc[key] = couch_doc[key]
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
78
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
79 if __name__ == '__main__':
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
80 couch_server = couchdb.Server('http://192.168.2.1:5984/')
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
81 couch_db = couch_server['feedworm']
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
82
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
83 arango_connection = pyArango.connection.Connection(arangoURL='http://xanthippe:8529', username='root', password='kahbHeotQDYL4R5o')
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
84 arango_db = arango_connection['feedworm']
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
85 arango_feed = get_or_cretae_arango_collection(arango_db, 'feed')
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
86 arango_feed_entry = get_or_cretae_arango_collection(arango_db, 'feed_entry')
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
87 arango_preferences = get_or_cretae_arango_collection(arango_db, 'preferences')
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
88
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
89 feed_mapping = {}
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
90
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
91 for id in couch_db:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
92 doc = couch_db[id]
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
93 migrate_feed(doc)
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
94
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
95 for id in couch_db:
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
96 doc = couch_db[id]
b4c83e9b9c7a migration from couchdb to arangodb
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
97 migrate_rest(doc)