Mercurial > hg > Feedworm
view backend/AbstractFeedUpdater.py @ 205:adf7f617bda9
make the name of the design document configurable via command line switch. When cloning the feedworm db, the design document is no longer the same as the database name
author | dirk |
---|---|
date | Sat, 02 Jun 2012 04:24:49 +0200 |
parents | e604c32f67aa |
children | f74fe7cb5091 |
line wrap: on
line source
from datetime import datetime import feedparser import logging from urllib2 import ProxyHandler STATUS_ERROR = 400 log = logging.getLogger("FeedUpdater") class AbstractFeedUpdater(object): ''' Abstract base class for FeedUpdater implementations - handles all the parsing of the feed. Subclasses need to implement creating and storing the new feed entries. ''' def __init__(self, preferences): self.preferences = preferences def update(self, feed): self.feed = feed log.info("updating " + feed.rss_url) result = self._retrieveFeed() self._setFeedTitle(result) self._processEntries(result) def _retrieveFeed(self): if self.preferences.isProxyConfigured(): proxyUrl = "http://%s:%i" % (self.preferences.proxyHost(), self.preferences.proxyPort()) proxyHandler = ProxyHandler({"http" : proxyUrl}) result = feedparser.parse(self.feed.rss_url, handlers=[proxyHandler]) else: # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260 result = feedparser.parse(self.feed.rss_url) # bozo flags if a feed is well-formed. # if result["bozo"] > 0: # raise FeedUpdateException() status = result["status"] if status >= STATUS_ERROR: raise FeedUpdateException("HTTP status " + str(status)) return result def _processEntries(self, feedDict): for entry in feedDict.entries: self._normalize(entry) self._processEntry(entry) self._incrementFeedUpdateDate() def _normalize(self, entry): self._normalizeId(entry) self._normalizePublishedDate(entry) self._normalizeUpdatedDate(entry) self._normalizeSummary(entry) def _normalizeId(self, entry): if not hasattr(entry, "id"): entry.id = entry.link def _normalizePublishedDate(self, entry): if not hasattr(entry, "published"): if hasattr(entry, "updated"): entry.published = entry.updated def _normalizeUpdatedDate(self, entry): if not hasattr(entry, "updated_parsed") or entry.updated_parsed is None: # TODO try to parse the entry.updated date string entry.updated_parsed = datetime.today() else: entry.updated_parsed = datetime(*entry.updated_parsed[:6]) def _normalizeSummary(self, entry): if not hasattr(entry, "summary"): if hasattr(entry, "content"): entry.summary = entry.content[0].value else: entry.summary = "" def _processEntry(self, entry): raise Exception("_processEntry is abstract, subclasses must override") def _incrementFeedUpdateDate(self): raise Exception("_incrementNextUpdateDate is abstract, subclasses must override") def _setFeedTitle(self, feedDict): if self.feed.title is None: if feedDict.feed.has_key("title"): self.feed.title = feedDict.feed.title else: self.feed.title = self.feed.rss_url class FeedUpdateException(Exception): pass