changeset 256:f79be01821c4

Arangodb backend, first version which barely works for reading
author Dirk Olmes <dirk@xanthippe.ping.de>
date Wed, 30 Jan 2019 07:11:10 +0100 (2019-01-30)
parents b4c83e9b9c7a
children 75b81da8d7a5
files BackendFactory.py backend/AbstractBackend.py backend/AbstractPreferences.py backend/arangodb/ArangoBackend.py backend/arangodb/ArangoDb.py backend/arangodb/Feed.py backend/arangodb/FeedEntry.py backend/arangodb/Preferences.py backend/arangodb/__init__.py backend/couchdb/CouchDbBackend.py backend/couchdb/Preferences.py migrate_couch_to_arango.py
diffstat 11 files changed, 194 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/BackendFactory.py	Thu Nov 29 18:46:21 2018 +0100
+++ b/BackendFactory.py	Wed Jan 30 07:11:10 2019 +0100
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
 import argparse
 
+ARANGODB_BACKEND = "arangodb"
+COUCHDB_BACKEND = "couchdb"
 SQLALCHEMY_BACKEND = "sqlalchemy"
-COUCHDB_BACKEND = "couchdb"
 
 def _parseArguments():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--backend", nargs="?", choices=[SQLALCHEMY_BACKEND, COUCHDB_BACKEND],
-        required=True, help="Specify the backend to use: either sqlalchemy or couchdb")
+    parser.add_argument("--backend", nargs="?", choices=[ARANGODB_BACKEND, COUCHDB_BACKEND, SQLALCHEMY_BACKEND],
+        required=True, help="Specify the backend to use: either arangodb, couchdb or sqlalchemy")
     return parser.parse_known_args()
 
 def createBackend():
@@ -19,5 +20,8 @@
     elif backend == COUCHDB_BACKEND:
         from backend.couchdb.CouchDbBackend import CouchDbBackend
         return CouchDbBackend()
+    elif backend == ARANGODB_BACKEND:
+        from backend.arangodb.ArangoBackend import ArangoBackend
+        return ArangoBackend()
     else:
         raise Exception("no backend configured")
--- a/backend/AbstractBackend.py	Thu Nov 29 18:46:21 2018 +0100
+++ b/backend/AbstractBackend.py	Wed Jan 30 07:11:10 2019 +0100
@@ -20,6 +20,12 @@
     #
     # handling of feeds
     #
+    def getFeeds(self):
+        if self.preferences().showOnlyUnreadFeeds():
+            self.feeds = self.getUnreadFeeds()
+        else:
+            self.feeds = self.getAllFeeds()
+        return self.feeds
 
     def selectFeed(self, index):
         self.selectedFeed = self.feeds[index]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/AbstractPreferences.py	Wed Jan 30 07:11:10 2019 +0100
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+class AbstractPreferences(object):
+    HIDE_READ_FEED_ENTRIES = "hideReadFeedEntries"
+    PROXY_HOST = "proxyHost"
+    SHOW_ONLY_UNREAD_FEEDS = "showOnlyUnreadFeeds"
+    START_MAXIMIZED = "startMaximized"
+    USE_PROXY = "useProxy"
+
+    def isProxyConfigured(self):
+        return self.proxyHost() is not None
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/arangodb/ArangoBackend.py	Wed Jan 30 07:11:10 2019 +0100
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+from argparse import ArgumentParser
+from ArangoDb import ArangoDb
+from backend.AbstractBackend import AbstractBackend
+from Feed import Feed
+from FeedEntry import FeedEntry
+from Preferences import Preferences
+from pyArango.connection import Connection
+
+"""
+Backend that uses ArangoDB for persistence
+"""
+class ArangoBackend(AbstractBackend):
+    def __init__(self):
+        super(ArangoBackend, self).__init__()
+        args = self._parse_arguments()
+        connection = Connection(arangoURL=args.dburl, username=args.user, password=args.password)
+        self.database = ArangoDb(connection[args.dbname])
+        self.prefs = None
+
+    def _parse_arguments(self):
+        parser = ArgumentParser()
+        parser.add_argument('--dburl', nargs='?', help='URL of the database', default='http://127.0.0.1:8529')
+        parser.add_argument('--dbname', nargs='?', help='name of the database', default='feedworm')
+        parser.add_argument('--user', nargs='?', help='username for authenticating the database connection', required=True)
+        parser.add_argument('--password', nargs='?', help='password for authenticating the database connection', required=True)
+        return parser.parse_known_args()[0]
+
+    def preferences(self):
+        if self.prefs is None:
+            self.prefs = Preferences(self.database)
+        return self.prefs
+
+    def getUnreadFeeds(self):
+        return Feed.get_unread(self.database)
+    
+    def _retrieveEntriesForSelectedFeed(self, hideReadEntries):
+        base_query = """
+            FOR feed_entry_doc in feed_entry
+                FILTER feed_entry_doc.feed == @feed_key"""
+        if hideReadEntries:
+            query = base_query + " AND feed_entry_doc.read == false "
+        query = query + " RETURN feed_entry_doc"
+        bind_vars = { 'feed_key': self.selectedFeed._key }
+        results = self.database.AQLQuery(query, bind_vars=bind_vars)
+        return [FeedEntry(doc) for doc in results]
+
+    def _markSelectedFeedEntryRead(self):
+        self.selectedFeedEntry.markRead()
+            
+    def updateAllFeeds(self):
+        for feed in Feed.all_pending_update(self.database):
+            print('updating ' + feed.title)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/arangodb/ArangoDb.py	Wed Jan 30 07:11:10 2019 +0100
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+class ArangoDb(object):
+    def __init__(self, database):
+        super(ArangoDb, self).__init__()
+        self.database = database
+
+    def get_or_create_collection(self, collection_name):
+        if self.database.hasCollection(collection_name):
+            return self.database[collection_name]
+        else:
+            return self.database.createCollection(name=collection_name)
+
+    def AQLQuery(self, query, bind_vars={}):
+        return self.database.AQLQuery(query, bindVars=bind_vars)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/arangodb/Feed.py	Wed Jan 30 07:11:10 2019 +0100
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+
+from datetime import datetime
+
+class Feed(object):
+    def __init__(self, document):
+        super(Feed, self).__init__()
+        self.document = document
+
+    def __getattr__(self, attribute):
+        return self.document[attribute]
+
+    @staticmethod
+    def get_unread(database):
+        query = """ 
+            FOR feed_entry_doc IN feed_entry
+                FOR feed_doc IN feed
+                    FILTER feed_entry_doc.read == false
+                    AND feed_entry_doc.feed == feed_doc._key
+                    RETURN DISTINCT feed_doc"""
+        results = database.AQLQuery(query)
+        return [Feed(doc) for doc in results]
+
+    @staticmethod
+    def all_pending_update(database):
+        query = """
+        FOR feed_doc IN feed
+            FILTER DATE_ISO8601(DATE_NOW()) > feed_doc.next_update
+            RETURN feed_doc
+        """
+        results = database.AQLQuery(query)
+        return [Feed(doc) for doc in results]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/arangodb/FeedEntry.py	Wed Jan 30 07:11:10 2019 +0100
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+from datetime import datetime
+
+class FeedEntry(object):
+    def __init__(self, document):
+        super(FeedEntry, self).__init__()
+        self.document = document
+
+    def __getattr__(self, attribute):
+        if attribute == 'updated':
+            return self._parse_updated()
+        return self.document[attribute]
+
+    def _parse_updated(self):
+        value = self.document['updated']
+        return datetime(*value)
+
+    def markRead(self):
+        self.document['read'] = True
+        self.document.patch()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/arangodb/Preferences.py	Wed Jan 30 07:11:10 2019 +0100
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+from ..AbstractPreferences import AbstractPreferences
+
+class Preferences(AbstractPreferences):
+    def __init__(self, database):
+        super(Preferences, self).__init__()
+        collection = database.get_or_create_collection('preferences')
+        self.doc = collection.fetchAll(limit=1)[0]
+
+    def hideReadFeedEntries(self):
+        return self.doc[self.HIDE_READ_FEED_ENTRIES]
+
+    def proxyHost(self):
+        return self.doc[self.PROXY_HOST]
+
+    def showOnlyUnreadFeeds(self):
+        return self.doc[self.SHOW_ONLY_UNREAD_FEEDS]
+
+    def startMaximized(self):
+        return self.doc[self.START_MAXIMIZED]
+
+    def useProxy(self):
+        return self.doc[self.USE_PROXY]
--- a/backend/couchdb/CouchDbBackend.py	Thu Nov 29 18:46:21 2018 +0100
+++ b/backend/couchdb/CouchDbBackend.py	Wed Jan 30 07:11:10 2019 +0100
@@ -34,17 +34,7 @@
     #
     # handling of feeds
     #
-
-    def getFeeds(self):
-        if self.preferences().showOnlyUnreadFeeds():
-            self.feeds = self._getUnreadFeeds()
-        else:
-            # make sure that the results are actually fetched into memory, otherwise we'll pass
-            # a ViewResults instance around which is not what we want
-            self.feeds = list(Feed.all(self.database))
-        return self.feeds
-
-    def _getUnreadFeeds(self):
+    def getUnreadFeeds(self):
         viewResults = self.database.view(CouchDb.feedsWithUnreadEntries(), group=True)
         feedsWithUnreadEntries = []
         for row in viewResults:
@@ -52,6 +42,11 @@
             feedsWithUnreadEntries.append(feed)
         return feedsWithUnreadEntries
 
+    def getAllFeeds(self):
+        # make sure that the results are actually fetched into memory, otherwise we'll pass
+        # a ViewResults instance around which is not what we want
+        return list(Feed.all(self.database))
+
     def _retrieveEntriesForSelectedFeed(self, hideReadEntries):
         viewResults = FeedEntry.entriesForFeed(self.selectedFeed, self.database)
         if hideReadEntries:
--- a/backend/couchdb/Preferences.py	Thu Nov 29 18:46:21 2018 +0100
+++ b/backend/couchdb/Preferences.py	Wed Jan 30 07:11:10 2019 +0100
@@ -1,15 +1,11 @@
 # -*- coding: utf-8 -*-
+from ..AbstractPreferences import AbstractPreferences
 import CouchDb
 
 DAYS_TO_KEEP_FEED_ENTRIES = "daysToKeepFeedEntries"
-HIDE_READ_FEED_ENTRIES = "hideReadFeedEntries"
-PROXY_HOST = "proxyHost"
 PROXY_PORT = "proxyPort"
-SHOW_ONLY_UNREAD_FEEDS = "showOnlyUnreadFeeds"
-START_MAXIMIZED = "startMaximized"
-USE_PROXY = "useProxy"
 
-class Preferences(object):
+class Preferences(AbstractPreferences):
     def __init__(self, database):
         self.database = database
         self._initDocument()
@@ -35,14 +31,11 @@
         self.document[key] = value
         self.documentIsDirty = True
 
-    def isProxyConfigured(self):
-        return self.proxyHost() is not None
-
     def proxyHost(self):
-        return self._documentValue(PROXY_HOST)
+        return self._documentValue(self.PROXY_HOST)
 
     def useProxy(self):
-        return self._documentValue(USE_PROXY, True)
+        return self._documentValue(self.USE_PROXY, True)
 
     def setUseProxy(self, value):
         self._setDocumentValue(USE_PROXY, value)
@@ -67,22 +60,22 @@
             self._setDocumentValue(PROXY_PORT, port)
 
     def showOnlyUnreadFeeds(self):
-        return self._documentValue(SHOW_ONLY_UNREAD_FEEDS, False)
+        return self._documentValue(self.SHOW_ONLY_UNREAD_FEEDS, False)
 
     def setShowOnlyUnreadFeeds(self, flag):
-        self._setDocumentValue(SHOW_ONLY_UNREAD_FEEDS, flag)
+        self._setDocumentValue(self.SHOW_ONLY_UNREAD_FEEDS, flag)
 
     def startMaximized(self):
-        return self._documentValue(START_MAXIMIZED, False)
+        return self._documentValue(self.START_MAXIMIZED, False)
 
     def setStartMaximized(self, flag):
         self._setDocumentValue(START_MAXIMIZED, flag)
 
     def hideReadFeedEntries(self):
-        return self._documentValue(HIDE_READ_FEED_ENTRIES, False)
+        return self._documentValue(self.HIDE_READ_FEED_ENTRIES, False)
 
     def setHideReadFeedEntries(self, flag):
-        self._setDocumentValue(HIDE_READ_FEED_ENTRIES, flag)
+        self._setDocumentValue(self.HIDE_READ_FEED_ENTRIES, flag)
 
     def daysToKeepFeedEntries(self):
         value = self._documentValue(DAYS_TO_KEEP_FEED_ENTRIES, 90)
--- a/migrate_couch_to_arango.py	Thu Nov 29 18:46:21 2018 +0100
+++ b/migrate_couch_to_arango.py	Wed Jan 30 07:11:10 2019 +0100
@@ -2,8 +2,10 @@
 # -*- coding: utf-8 -*-
 
 import couchdb
+from datetime import datetime
 import pyArango.connection
 
+
 def get_or_cretae_arango_collection(arango_db, collection_name):
     if arango_db.hasCollection(collection_name):
         return arango_db[collection_name]
@@ -18,11 +20,20 @@
         if couch_doc['doctype'] == 'feed':
             arango_doc = arango_feed.createDocument()
             copy(couch_doc, arango_doc)
+            convert_date(couch_doc, arango_doc, 'next_update')
             arango_doc.save()
             feed_mapping[couch_id] = arango_doc['_key']
     except KeyError:
             print('**** migrate error ' + str(document))
 
+def convert_date(couch_doc, arango_doc, key):
+    date_string = couch_doc[key]
+    couch_date = datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ')
+    # ignore the milliseconds - if they are printed through %f the number of 
+    # digits is not what arangodb expects
+    date_string = couch_date.strftime('%Y-%m-%dT%H:%M:%S.000')
+    arango_doc[key] = date_string
+
 def migrate_rest(document):
     if document['_id'].startswith('_design'):
         return