Mercurial > hg > Feedworm
comparison backend/sqlalchemy/FeedUpdater.py @ 121:510a5d00e98a backend
re-enabled AddFeed - does not work yet
author | Dirk Olmes <dirk@xanthippe.ping.de> |
---|---|
date | Sun, 21 Aug 2011 04:17:13 +0200 |
parents | FeedUpdater.py@e4038dd8cc0e |
children | 862760b161b4 |
comparison
equal
deleted
inserted
replaced
120:e830fa1cc7a2 | 121:510a5d00e98a |
---|---|
1 | |
2 from datetime import datetime | |
3 from Feed import Feed | |
4 from FeedEntry import FeedEntry | |
5 import feedparser | |
6 import logging | |
7 | |
8 STATUS_ERROR = 400 | |
9 log = logging.getLogger("FeedUpdater") | |
10 | |
11 def updateAllFeeds(session): | |
12 allFeeds = findFeedsToUpdate(session) | |
13 for feed in allFeeds: | |
14 try: | |
15 FeedUpdater(session, feed).update() | |
16 except FeedUpdateException, fue: | |
17 log.warn("problems while updating feed " + feed.rss_url + ": " + str(fue)) | |
18 session.commit() | |
19 | |
20 def findFeedsToUpdate(session): | |
21 return session.query(Feed).filter(Feed.next_update < datetime.now()) | |
22 | |
23 def normalize(entry): | |
24 if not hasattr(entry, "id"): | |
25 entry.id = entry.link | |
26 if not hasattr(entry, "updated_parsed"): | |
27 entry.updated_parsed = datetime.today() | |
28 else: | |
29 entry.updated_parsed = datetime(*entry.updated_parsed[:6]) | |
30 if not hasattr(entry, "summary"): | |
31 if hasattr(entry, "content"): | |
32 entry.summary = entry.content[0].value | |
33 else: | |
34 entry.summary = "" | |
35 | |
36 class FeedUpdater(object): | |
37 def __init__(self, session, feed): | |
38 self.session = session | |
39 self.feed = feed | |
40 | |
41 # TODO this is a HACK! creating new instances from itself is bad but required due to the storage of the session. | |
42 def createNewFeed(self, url): | |
43 # when updating to python3 see http://code.google.com/p/feedparser/issues/detail?id=260 | |
44 result = feedparser.parse(url) | |
45 if result.has_key("title"): | |
46 title = result["feed"].title | |
47 else: | |
48 title = url | |
49 newFeed = Feed(title, url) | |
50 self.session.add(newFeed) | |
51 | |
52 FeedUpdater(self.session, newFeed).update() | |
53 | |
54 def update(self): | |
55 log.info("updating " + self.feed.rss_url) | |
56 result = self.getFeed() | |
57 for entry in result.entries: | |
58 self.processEntry(entry) | |
59 self.feed.incrementNextUpdateDate() | |
60 | |
61 def getFeed(self): | |
62 result = feedparser.parse(self.feed.rss_url) | |
63 # bozo flags if a feed is well-formed. | |
64 # if result["bozo"] > 0: | |
65 # raise FeedUpdateException() | |
66 status = result["status"] | |
67 if status >= STATUS_ERROR: | |
68 raise FeedUpdateException("HTTP status " + str(status)) | |
69 return result | |
70 | |
71 def processEntry(self, entry): | |
72 normalize(entry) | |
73 feedEntry = FeedEntry.findById(entry.id, self.session) | |
74 if feedEntry is None: | |
75 self.createFeedEntry(entry) | |
76 | |
77 def createFeedEntry(self, entry): | |
78 new = FeedEntry.create(entry) | |
79 new.feed = self.feed | |
80 self.session.add(new) | |
81 log.info("new feed entry: " + entry.title) | |
82 | |
83 class FeedUpdateException(Exception): | |
84 pass |