comparison backend/sqlalchemy/FeedUpdater.py @ 121:510a5d00e98a backend

re-enabled AddFeed - does not work yet
author Dirk Olmes <dirk@xanthippe.ping.de>
date Sun, 21 Aug 2011 04:17:13 +0200
parents FeedUpdater.py@e4038dd8cc0e
children 862760b161b4
comparison
equal deleted inserted replaced
120:e830fa1cc7a2 121:510a5d00e98a
1
2 from datetime import datetime
3 from Feed import Feed
4 from FeedEntry import FeedEntry
5 import feedparser
6 import logging
7
8 STATUS_ERROR = 400
9 log = logging.getLogger("FeedUpdater")
10
11 def updateAllFeeds(session):
12 allFeeds = findFeedsToUpdate(session)
13 for feed in allFeeds:
14 try:
15 FeedUpdater(session, feed).update()
16 except FeedUpdateException, fue:
17 log.warn("problems while updating feed " + feed.rss_url + ": " + str(fue))
18 session.commit()
19
20 def findFeedsToUpdate(session):
21 return session.query(Feed).filter(Feed.next_update < datetime.now())
22
23 def normalize(entry):
24 if not hasattr(entry, "id"):
25 entry.id = entry.link
26 if not hasattr(entry, "updated_parsed"):
27 entry.updated_parsed = datetime.today()
28 else:
29 entry.updated_parsed = datetime(*entry.updated_parsed[:6])
30 if not hasattr(entry, "summary"):
31 if hasattr(entry, "content"):
32 entry.summary = entry.content[0].value
33 else:
34 entry.summary = ""
35
36 class FeedUpdater(object):
37 def __init__(self, session, feed):
38 self.session = session
39 self.feed = feed
40
41 # TODO this is a HACK! creating new instances from itself is bad but required due to the storage of the session.
42 def createNewFeed(self, url):
43 # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260
44 result = feedparser.parse(url)
45 if result.has_key("title"):
46 title = result["feed"].title
47 else:
48 title = url
49 newFeed = Feed(title, url)
50 self.session.add(newFeed)
51
52 FeedUpdater(self.session, newFeed).update()
53
54 def update(self):
55 log.info("updating " + self.feed.rss_url)
56 result = self.getFeed()
57 for entry in result.entries:
58 self.processEntry(entry)
59 self.feed.incrementNextUpdateDate()
60
61 def getFeed(self):
62 result = feedparser.parse(self.feed.rss_url)
63 # bozo flags if a feed is well-formed.
64 # if result["bozo"] > 0:
65 # raise FeedUpdateException()
66 status = result["status"]
67 if status >= STATUS_ERROR:
68 raise FeedUpdateException("HTTP status " + str(status))
69 return result
70
71 def processEntry(self, entry):
72 normalize(entry)
73 feedEntry = FeedEntry.findById(entry.id, self.session)
74 if feedEntry is None:
75 self.createFeedEntry(entry)
76
77 def createFeedEntry(self, entry):
78 new = FeedEntry.create(entry)
79 new.feed = self.feed
80 self.session.add(new)
81 log.info("new feed entry: " + entry.title)
82
83 class FeedUpdateException(Exception):
84 pass