diff backend/sqlalchemy/FeedUpdater.py @ 121:510a5d00e98a backend

re-enabled AddFeed - does not work yet
author Dirk Olmes <dirk@xanthippe.ping.de>
date Sun, 21 Aug 2011 04:17:13 +0200
parents FeedUpdater.py@e4038dd8cc0e
children 862760b161b4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/backend/sqlalchemy/FeedUpdater.py	Sun Aug 21 04:17:13 2011 +0200
@@ -0,0 +1,84 @@
+
+from datetime import datetime
+from Feed import Feed
+from FeedEntry import FeedEntry
+import feedparser
+import logging
+
+STATUS_ERROR = 400
+log = logging.getLogger("FeedUpdater")
+
+def updateAllFeeds(session):
+    allFeeds = findFeedsToUpdate(session)
+    for feed in allFeeds:
+        try:
+            FeedUpdater(session, feed).update()
+        except FeedUpdateException, fue:
+            log.warn("problems while updating feed " + feed.rss_url + ": " + str(fue))
+        session.commit()
+
+def findFeedsToUpdate(session):
+    return session.query(Feed).filter(Feed.next_update < datetime.now())
+
+def normalize(entry):
+    if not hasattr(entry, "id"):
+        entry.id = entry.link
+    if not hasattr(entry, "updated_parsed"):
+        entry.updated_parsed = datetime.today()
+    else:
+        entry.updated_parsed = datetime(*entry.updated_parsed[:6])
+    if not hasattr(entry, "summary"):
+        if hasattr(entry, "content"):
+            entry.summary = entry.content[0].value
+        else:
+            entry.summary = ""
+
+class FeedUpdater(object):
+    def __init__(self, session, feed):
+        self.session = session
+        self.feed = feed
+
+    # TODO this is a HACK! creating new instances from itself is bad but required due to the storage of the session.
+    def createNewFeed(self, url):
+        # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260
+        result = feedparser.parse(url)
+        if result.has_key("title"):
+            title = result["feed"].title
+        else:
+            title = url
+        newFeed = Feed(title, url)
+        self.session.add(newFeed)
+
+        FeedUpdater(self.session, newFeed).update()
+
+    def update(self):
+        log.info("updating " + self.feed.rss_url)
+        result = self.getFeed()
+        for entry in result.entries:
+            self.processEntry(entry)
+        self.feed.incrementNextUpdateDate()
+
+    def getFeed(self):
+        result = feedparser.parse(self.feed.rss_url)
+        # bozo flags if a feed is well-formed.
+#        if result["bozo"] > 0:
+#            raise FeedUpdateException()
+        status = result["status"]
+        if status >= STATUS_ERROR:
+            raise FeedUpdateException("HTTP status " + str(status))
+        return result
+
+    def processEntry(self, entry):
+        normalize(entry)
+        feedEntry = FeedEntry.findById(entry.id, self.session)
+        if feedEntry is None:
+            self.createFeedEntry(entry)
+
+    def createFeedEntry(self, entry):
+        new = FeedEntry.create(entry)
+        new.feed = self.feed
+        self.session.add(new)
+        log.info("new feed entry: " + entry.title)
+
+class FeedUpdateException(Exception):
+    pass