annotate FeedUpdater.py @ 27:bdd1296a4b8c

implemented adding a feed
author Dirk Olmes <dirk@xanthippe.ping.de>
date Thu, 29 Apr 2010 06:47:47 +0200
parents e87c54b3a216
children 72dfae865899
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
1
5
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
2 from datetime import datetime
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
3 from Feed import Feed
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
4 from FeedEntry import FeedEntry
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
5 import feedparser
11
e87c54b3a216 use the logging framework for printing messages
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 10
diff changeset
6 import logging
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
7
9
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
8 STATUS_OK = 200
11
e87c54b3a216 use the logging framework for printing messages
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 10
diff changeset
9 logger = logging.getLogger("FeedUpdater")
9
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
10
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
11 def updateAllFeeds(session):
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
12 allFeeds = session.query(Feed)
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
13 for feed in allFeeds:
10
01a86b178e60 catch the FeedUpdateException that might be raised when updating a feed, print it and continue with next feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 9
diff changeset
14 try:
01a86b178e60 catch the FeedUpdateException that might be raised when updating a feed, print it and continue with next feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 9
diff changeset
15 FeedUpdater(session, feed).update()
01a86b178e60 catch the FeedUpdateException that might be raised when updating a feed, print it and continue with next feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 9
diff changeset
16 except FeedUpdateException:
11
e87c54b3a216 use the logging framework for printing messages
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 10
diff changeset
17 logger.warn("problems while updating feed " + feed.rss_url)
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
18 session.commit()
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
19
27
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
20 def createNewFeed(url, session):
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
21 result = feedparser.parse(url)
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
22 title = result["feed"].title
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
23 newFeed = Feed(title, url)
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
24 session.add(newFeed)
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
25 session.commit()
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
26 FeedUpdater(newFeed).update()
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
27 session.commit()
bdd1296a4b8c implemented adding a feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 11
diff changeset
28
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
29 class FeedUpdater(object):
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
30 def __init__(self, session, feed):
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
31 self.session = session
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
32 self.feed = feed
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
33
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
34 def update(self):
9
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
35 result = self.getFeed()
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
36 for entry in result.entries:
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
37 self.processEntry(entry)
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
38
9
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
39 def getFeed(self):
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
40 result = feedparser.parse(self.feed.rss_url)
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
41 if result["status"] is not STATUS_OK:
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
42 raise FeedUpdateException()
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
43 return result
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
44
4
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
45 def processEntry(self, entry):
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
46 feedEntry = FeedEntry.findById(entry.id, self.session)
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
47 if feedEntry is None:
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
48 self.createFeedEntry(entry)
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
49
e0199f383442 retrieve a feed for the given URL, store entries as feed_entry rows into the database
Dirk Olmes <dirk@xanthippe.ping.de>
parents:
diff changeset
50 def createFeedEntry(self, entry):
5
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
51 new = FeedEntry()
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
52 new.id = entry.id
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
53 new.link = entry.link
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
54 new.title = entry.title
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
55 new.updated = datetime(*entry.updated_parsed[:6])
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
56 new.summary = entry.summary
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
57 new.feed = self.feed
bfd47f55d85b add the updated date of the feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 4
diff changeset
58 self.session.add(new)
9
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
59
fd4c8bfa62d6 FeedUpdater throws an exception if the URL could not be retrieved successfully. Includes unit tests.
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 7
diff changeset
60 class FeedUpdateException(Exception):
10
01a86b178e60 catch the FeedUpdateException that might be raised when updating a feed, print it and continue with next feed
Dirk Olmes <dirk@xanthippe.ping.de>
parents: 9
diff changeset
61 pass