Mercurial > hg > Feedworm
changeset 256:f79be01821c4
Arangodb backend, first version which barely works for reading
author | Dirk Olmes <dirk@xanthippe.ping.de> |
---|---|
date | Wed, 30 Jan 2019 07:11:10 +0100 (2019-01-30) |
parents | b4c83e9b9c7a |
children | 75b81da8d7a5 |
files | BackendFactory.py backend/AbstractBackend.py backend/AbstractPreferences.py backend/arangodb/ArangoBackend.py backend/arangodb/ArangoDb.py backend/arangodb/Feed.py backend/arangodb/FeedEntry.py backend/arangodb/Preferences.py backend/arangodb/__init__.py backend/couchdb/CouchDbBackend.py backend/couchdb/Preferences.py migrate_couch_to_arango.py |
diffstat | 11 files changed, 194 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/BackendFactory.py Thu Nov 29 18:46:21 2018 +0100 +++ b/BackendFactory.py Wed Jan 30 07:11:10 2019 +0100 @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- import argparse +ARANGODB_BACKEND = "arangodb" +COUCHDB_BACKEND = "couchdb" SQLALCHEMY_BACKEND = "sqlalchemy" -COUCHDB_BACKEND = "couchdb" def _parseArguments(): parser = argparse.ArgumentParser() - parser.add_argument("--backend", nargs="?", choices=[SQLALCHEMY_BACKEND, COUCHDB_BACKEND], - required=True, help="Specify the backend to use: either sqlalchemy or couchdb") + parser.add_argument("--backend", nargs="?", choices=[ARANGODB_BACKEND, COUCHDB_BACKEND, SQLALCHEMY_BACKEND], + required=True, help="Specify the backend to use: either arangodb, couchdb or sqlalchemy") return parser.parse_known_args() def createBackend(): @@ -19,5 +20,8 @@ elif backend == COUCHDB_BACKEND: from backend.couchdb.CouchDbBackend import CouchDbBackend return CouchDbBackend() + elif backend == ARANGODB_BACKEND: + from backend.arangodb.ArangoBackend import ArangoBackend + return ArangoBackend() else: raise Exception("no backend configured")
--- a/backend/AbstractBackend.py Thu Nov 29 18:46:21 2018 +0100 +++ b/backend/AbstractBackend.py Wed Jan 30 07:11:10 2019 +0100 @@ -20,6 +20,12 @@ # # handling of feeds # + def getFeeds(self): + if self.preferences().showOnlyUnreadFeeds(): + self.feeds = self.getUnreadFeeds() + else: + self.feeds = self.getAllFeeds() + return self.feeds def selectFeed(self, index): self.selectedFeed = self.feeds[index]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/AbstractPreferences.py Wed Jan 30 07:11:10 2019 +0100 @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +class AbstractPreferences(object): + HIDE_READ_FEED_ENTRIES = "hideReadFeedEntries" + PROXY_HOST = "proxyHost" + SHOW_ONLY_UNREAD_FEEDS = "showOnlyUnreadFeeds" + START_MAXIMIZED = "startMaximized" + USE_PROXY = "useProxy" + + def isProxyConfigured(self): + return self.proxyHost() is not None
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/arangodb/ArangoBackend.py Wed Jan 30 07:11:10 2019 +0100 @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +from argparse import ArgumentParser +from ArangoDb import ArangoDb +from backend.AbstractBackend import AbstractBackend +from Feed import Feed +from FeedEntry import FeedEntry +from Preferences import Preferences +from pyArango.connection import Connection + +""" +Backend that uses ArangoDB for persistence +""" +class ArangoBackend(AbstractBackend): + def __init__(self): + super(ArangoBackend, self).__init__() + args = self._parse_arguments() + connection = Connection(arangoURL=args.dburl, username=args.user, password=args.password) + self.database = ArangoDb(connection[args.dbname]) + self.prefs = None + + def _parse_arguments(self): + parser = ArgumentParser() + parser.add_argument('--dburl', nargs='?', help='URL of the database', default='http://127.0.0.1:8529') + parser.add_argument('--dbname', nargs='?', help='name of the database', default='feedworm') + parser.add_argument('--user', nargs='?', help='username for authenticating the database connection', required=True) + parser.add_argument('--password', nargs='?', help='password for authenticating the database connection', required=True) + return parser.parse_known_args()[0] + + def preferences(self): + if self.prefs is None: + self.prefs = Preferences(self.database) + return self.prefs + + def getUnreadFeeds(self): + return Feed.get_unread(self.database) + + def _retrieveEntriesForSelectedFeed(self, hideReadEntries): + base_query = """ + FOR feed_entry_doc in feed_entry + FILTER feed_entry_doc.feed == @feed_key""" + if hideReadEntries: + query = base_query + " AND feed_entry_doc.read == false " + query = query + " RETURN feed_entry_doc" + bind_vars = { 'feed_key': self.selectedFeed._key } + results = self.database.AQLQuery(query, bind_vars=bind_vars) + return [FeedEntry(doc) for doc in results] + + def _markSelectedFeedEntryRead(self): + self.selectedFeedEntry.markRead() + + def updateAllFeeds(self): + for feed in Feed.all_pending_update(self.database): + print('updating ' + feed.title)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/arangodb/ArangoDb.py Wed Jan 30 07:11:10 2019 +0100 @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +class ArangoDb(object): + def __init__(self, database): + super(ArangoDb, self).__init__() + self.database = database + + def get_or_create_collection(self, collection_name): + if self.database.hasCollection(collection_name): + return self.database[collection_name] + else: + return self.database.createCollection(name=collection_name) + + def AQLQuery(self, query, bind_vars={}): + return self.database.AQLQuery(query, bindVars=bind_vars) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/arangodb/Feed.py Wed Jan 30 07:11:10 2019 +0100 @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime + +class Feed(object): + def __init__(self, document): + super(Feed, self).__init__() + self.document = document + + def __getattr__(self, attribute): + return self.document[attribute] + + @staticmethod + def get_unread(database): + query = """ + FOR feed_entry_doc IN feed_entry + FOR feed_doc IN feed + FILTER feed_entry_doc.read == false + AND feed_entry_doc.feed == feed_doc._key + RETURN DISTINCT feed_doc""" + results = database.AQLQuery(query) + return [Feed(doc) for doc in results] + + @staticmethod + def all_pending_update(database): + query = """ + FOR feed_doc IN feed + FILTER DATE_ISO8601(DATE_NOW()) > feed_doc.next_update + RETURN feed_doc + """ + results = database.AQLQuery(query) + return [Feed(doc) for doc in results]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/arangodb/FeedEntry.py Wed Jan 30 07:11:10 2019 +0100 @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from datetime import datetime + +class FeedEntry(object): + def __init__(self, document): + super(FeedEntry, self).__init__() + self.document = document + + def __getattr__(self, attribute): + if attribute == 'updated': + return self._parse_updated() + return self.document[attribute] + + def _parse_updated(self): + value = self.document['updated'] + return datetime(*value) + + def markRead(self): + self.document['read'] = True + self.document.patch()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/backend/arangodb/Preferences.py Wed Jan 30 07:11:10 2019 +0100 @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +from ..AbstractPreferences import AbstractPreferences + +class Preferences(AbstractPreferences): + def __init__(self, database): + super(Preferences, self).__init__() + collection = database.get_or_create_collection('preferences') + self.doc = collection.fetchAll(limit=1)[0] + + def hideReadFeedEntries(self): + return self.doc[self.HIDE_READ_FEED_ENTRIES] + + def proxyHost(self): + return self.doc[self.PROXY_HOST] + + def showOnlyUnreadFeeds(self): + return self.doc[self.SHOW_ONLY_UNREAD_FEEDS] + + def startMaximized(self): + return self.doc[self.START_MAXIMIZED] + + def useProxy(self): + return self.doc[self.USE_PROXY]
--- a/backend/couchdb/CouchDbBackend.py Thu Nov 29 18:46:21 2018 +0100 +++ b/backend/couchdb/CouchDbBackend.py Wed Jan 30 07:11:10 2019 +0100 @@ -34,17 +34,7 @@ # # handling of feeds # - - def getFeeds(self): - if self.preferences().showOnlyUnreadFeeds(): - self.feeds = self._getUnreadFeeds() - else: - # make sure that the results are actually fetched into memory, otherwise we'll pass - # a ViewResults instance around which is not what we want - self.feeds = list(Feed.all(self.database)) - return self.feeds - - def _getUnreadFeeds(self): + def getUnreadFeeds(self): viewResults = self.database.view(CouchDb.feedsWithUnreadEntries(), group=True) feedsWithUnreadEntries = [] for row in viewResults: @@ -52,6 +42,11 @@ feedsWithUnreadEntries.append(feed) return feedsWithUnreadEntries + def getAllFeeds(self): + # make sure that the results are actually fetched into memory, otherwise we'll pass + # a ViewResults instance around which is not what we want + return list(Feed.all(self.database)) + def _retrieveEntriesForSelectedFeed(self, hideReadEntries): viewResults = FeedEntry.entriesForFeed(self.selectedFeed, self.database) if hideReadEntries:
--- a/backend/couchdb/Preferences.py Thu Nov 29 18:46:21 2018 +0100 +++ b/backend/couchdb/Preferences.py Wed Jan 30 07:11:10 2019 +0100 @@ -1,15 +1,11 @@ # -*- coding: utf-8 -*- +from ..AbstractPreferences import AbstractPreferences import CouchDb DAYS_TO_KEEP_FEED_ENTRIES = "daysToKeepFeedEntries" -HIDE_READ_FEED_ENTRIES = "hideReadFeedEntries" -PROXY_HOST = "proxyHost" PROXY_PORT = "proxyPort" -SHOW_ONLY_UNREAD_FEEDS = "showOnlyUnreadFeeds" -START_MAXIMIZED = "startMaximized" -USE_PROXY = "useProxy" -class Preferences(object): +class Preferences(AbstractPreferences): def __init__(self, database): self.database = database self._initDocument() @@ -35,14 +31,11 @@ self.document[key] = value self.documentIsDirty = True - def isProxyConfigured(self): - return self.proxyHost() is not None - def proxyHost(self): - return self._documentValue(PROXY_HOST) + return self._documentValue(self.PROXY_HOST) def useProxy(self): - return self._documentValue(USE_PROXY, True) + return self._documentValue(self.USE_PROXY, True) def setUseProxy(self, value): self._setDocumentValue(USE_PROXY, value) @@ -67,22 +60,22 @@ self._setDocumentValue(PROXY_PORT, port) def showOnlyUnreadFeeds(self): - return self._documentValue(SHOW_ONLY_UNREAD_FEEDS, False) + return self._documentValue(self.SHOW_ONLY_UNREAD_FEEDS, False) def setShowOnlyUnreadFeeds(self, flag): - self._setDocumentValue(SHOW_ONLY_UNREAD_FEEDS, flag) + self._setDocumentValue(self.SHOW_ONLY_UNREAD_FEEDS, flag) def startMaximized(self): - return self._documentValue(START_MAXIMIZED, False) + return self._documentValue(self.START_MAXIMIZED, False) def setStartMaximized(self, flag): self._setDocumentValue(START_MAXIMIZED, flag) def hideReadFeedEntries(self): - return self._documentValue(HIDE_READ_FEED_ENTRIES, False) + return self._documentValue(self.HIDE_READ_FEED_ENTRIES, False) def setHideReadFeedEntries(self, flag): - self._setDocumentValue(HIDE_READ_FEED_ENTRIES, flag) + self._setDocumentValue(self.HIDE_READ_FEED_ENTRIES, flag) def daysToKeepFeedEntries(self): value = self._documentValue(DAYS_TO_KEEP_FEED_ENTRIES, 90)
--- a/migrate_couch_to_arango.py Thu Nov 29 18:46:21 2018 +0100 +++ b/migrate_couch_to_arango.py Wed Jan 30 07:11:10 2019 +0100 @@ -2,8 +2,10 @@ # -*- coding: utf-8 -*- import couchdb +from datetime import datetime import pyArango.connection + def get_or_cretae_arango_collection(arango_db, collection_name): if arango_db.hasCollection(collection_name): return arango_db[collection_name] @@ -18,11 +20,20 @@ if couch_doc['doctype'] == 'feed': arango_doc = arango_feed.createDocument() copy(couch_doc, arango_doc) + convert_date(couch_doc, arango_doc, 'next_update') arango_doc.save() feed_mapping[couch_id] = arango_doc['_key'] except KeyError: print('**** migrate error ' + str(document)) +def convert_date(couch_doc, arango_doc, key): + date_string = couch_doc[key] + couch_date = datetime.strptime(date_string, '%Y-%m-%dT%H:%M:%SZ') + # ignore the milliseconds - if they are printed through %f the number of + # digits is not what arangodb expects + date_string = couch_date.strftime('%Y-%m-%dT%H:%M:%S.000') + arango_doc[key] = date_string + def migrate_rest(document): if document['_id'].startswith('_design'): return