comparison FeedUpdater.py @ 28:72dfae865899

better logging when updating feeds, handle entries that have no id
author Dirk Olmes <dirk@xanthippe.ping.de>
date Thu, 29 Apr 2010 07:04:42 +0200
parents bdd1296a4b8c
children aaec263f07ca
comparison
equal deleted inserted replaced
27:bdd1296a4b8c 28:72dfae865899
3 from Feed import Feed 3 from Feed import Feed
4 from FeedEntry import FeedEntry 4 from FeedEntry import FeedEntry
5 import feedparser 5 import feedparser
6 import logging 6 import logging
7 7
8 STATUS_OK = 200 8 STATUS_ERROR = 400
9 logger = logging.getLogger("FeedUpdater") 9 log = logging.getLogger("FeedUpdater")
10 10
11 def updateAllFeeds(session): 11 def updateAllFeeds(session):
12 allFeeds = session.query(Feed) 12 allFeeds = session.query(Feed)
13 for feed in allFeeds: 13 for feed in allFeeds:
14 try: 14 try:
15 FeedUpdater(session, feed).update() 15 FeedUpdater(session, feed).update()
16 except FeedUpdateException: 16 except FeedUpdateException:
17 logger.warn("problems while updating feed " + feed.rss_url) 17 log.warn("problems while updating feed " + feed.rss_url)
18 session.commit() 18 session.commit()
19 19
20 def createNewFeed(url, session): 20 def createNewFeed(url, session):
21 result = feedparser.parse(url) 21 result = feedparser.parse(url)
22 title = result["feed"].title 22 title = result["feed"].title
23 newFeed = Feed(title, url) 23 newFeed = Feed(title, url)
24 session.add(newFeed) 24 session.add(newFeed)
25 session.commit() 25 session.commit()
26
26 FeedUpdater(newFeed).update() 27 FeedUpdater(newFeed).update()
27 session.commit() 28 session.commit()
28 29
29 class FeedUpdater(object): 30 class FeedUpdater(object):
30 def __init__(self, session, feed): 31 def __init__(self, session, feed):
31 self.session = session 32 self.session = session
32 self.feed = feed 33 self.feed = feed
33 34
34 def update(self): 35 def update(self):
36 log.info("updating " + self.feed.rss_url)
35 result = self.getFeed() 37 result = self.getFeed()
36 for entry in result.entries: 38 for entry in result.entries:
37 self.processEntry(entry) 39 self.processEntry(entry)
38 40
39 def getFeed(self): 41 def getFeed(self):
40 result = feedparser.parse(self.feed.rss_url) 42 result = feedparser.parse(self.feed.rss_url)
41 if result["status"] is not STATUS_OK: 43 if result["status"] >= STATUS_ERROR:
42 raise FeedUpdateException() 44 raise FeedUpdateException()
43 return result 45 return result
44 46
45 def processEntry(self, entry): 47 def processEntry(self, entry):
48 self.normalize(entry)
46 feedEntry = FeedEntry.findById(entry.id, self.session) 49 feedEntry = FeedEntry.findById(entry.id, self.session)
47 if feedEntry is None: 50 if feedEntry is None:
48 self.createFeedEntry(entry) 51 self.createFeedEntry(entry)
49 52
53 def normalize(self, entry):
54 if not hasattr(entry, "id"):
55 entry.id = entry.link
56
50 def createFeedEntry(self, entry): 57 def createFeedEntry(self, entry):
51 new = FeedEntry() 58 new = FeedEntry()
52 new.id = entry.id 59 new.id = entry.id
53 new.link = entry.link 60 new.link = entry.link
54 new.title = entry.title 61 new.title = entry.title