Mercurial > hg > Feedworm
comparison FeedUpdater.py @ 28:72dfae865899
better logging when updating feeds, handle entries that have no id
author | Dirk Olmes <dirk@xanthippe.ping.de> |
---|---|
date | Thu, 29 Apr 2010 07:04:42 +0200 |
parents | bdd1296a4b8c |
children | aaec263f07ca |
comparison
equal
deleted
inserted
replaced
27:bdd1296a4b8c | 28:72dfae865899 |
---|---|
3 from Feed import Feed | 3 from Feed import Feed |
4 from FeedEntry import FeedEntry | 4 from FeedEntry import FeedEntry |
5 import feedparser | 5 import feedparser |
6 import logging | 6 import logging |
7 | 7 |
8 STATUS_OK = 200 | 8 STATUS_ERROR = 400 |
9 logger = logging.getLogger("FeedUpdater") | 9 log = logging.getLogger("FeedUpdater") |
10 | 10 |
11 def updateAllFeeds(session): | 11 def updateAllFeeds(session): |
12 allFeeds = session.query(Feed) | 12 allFeeds = session.query(Feed) |
13 for feed in allFeeds: | 13 for feed in allFeeds: |
14 try: | 14 try: |
15 FeedUpdater(session, feed).update() | 15 FeedUpdater(session, feed).update() |
16 except FeedUpdateException: | 16 except FeedUpdateException: |
17 logger.warn("problems while updating feed " + feed.rss_url) | 17 log.warn("problems while updating feed " + feed.rss_url) |
18 session.commit() | 18 session.commit() |
19 | 19 |
20 def createNewFeed(url, session): | 20 def createNewFeed(url, session): |
21 result = feedparser.parse(url) | 21 result = feedparser.parse(url) |
22 title = result["feed"].title | 22 title = result["feed"].title |
23 newFeed = Feed(title, url) | 23 newFeed = Feed(title, url) |
24 session.add(newFeed) | 24 session.add(newFeed) |
25 session.commit() | 25 session.commit() |
26 | |
26 FeedUpdater(newFeed).update() | 27 FeedUpdater(newFeed).update() |
27 session.commit() | 28 session.commit() |
28 | 29 |
29 class FeedUpdater(object): | 30 class FeedUpdater(object): |
30 def __init__(self, session, feed): | 31 def __init__(self, session, feed): |
31 self.session = session | 32 self.session = session |
32 self.feed = feed | 33 self.feed = feed |
33 | 34 |
34 def update(self): | 35 def update(self): |
36 log.info("updating " + self.feed.rss_url) | |
35 result = self.getFeed() | 37 result = self.getFeed() |
36 for entry in result.entries: | 38 for entry in result.entries: |
37 self.processEntry(entry) | 39 self.processEntry(entry) |
38 | 40 |
39 def getFeed(self): | 41 def getFeed(self): |
40 result = feedparser.parse(self.feed.rss_url) | 42 result = feedparser.parse(self.feed.rss_url) |
41 if result["status"] is not STATUS_OK: | 43 if result["status"] >= STATUS_ERROR: |
42 raise FeedUpdateException() | 44 raise FeedUpdateException() |
43 return result | 45 return result |
44 | 46 |
45 def processEntry(self, entry): | 47 def processEntry(self, entry): |
48 self.normalize(entry) | |
46 feedEntry = FeedEntry.findById(entry.id, self.session) | 49 feedEntry = FeedEntry.findById(entry.id, self.session) |
47 if feedEntry is None: | 50 if feedEntry is None: |
48 self.createFeedEntry(entry) | 51 self.createFeedEntry(entry) |
49 | 52 |
53 def normalize(self, entry): | |
54 if not hasattr(entry, "id"): | |
55 entry.id = entry.link | |
56 | |
50 def createFeedEntry(self, entry): | 57 def createFeedEntry(self, entry): |
51 new = FeedEntry() | 58 new = FeedEntry() |
52 new.id = entry.id | 59 new.id = entry.id |
53 new.link = entry.link | 60 new.link = entry.link |
54 new.title = entry.title | 61 new.title = entry.title |