Mercurial > hg > Feedworm
changeset 4:e0199f383442
retrieve a feed for the given URL, store entries as feed_entry rows into the database
author | Dirk Olmes <dirk@xanthippe.ping.de> |
---|---|
date | Mon, 26 Apr 2010 19:33:07 +0200 |
parents | 2ac144a98370 |
children | bfd47f55d85b |
files | Feed.py FeedEntry.py FeedUpdater.py Mapping.py feedupdate-main.py |
diffstat | 5 files changed, 75 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/Feed.py Mon Apr 26 17:43:48 2010 +0200 +++ b/Feed.py Mon Apr 26 19:33:07 2010 +0200 @@ -5,4 +5,4 @@ self.url = url def __repr__(self): - return "<Feed (%d) %s>" % (self.id, self.name) + return "<Feed (%d) %s>" % (self.pk, self.name)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FeedEntry.py Mon Apr 26 19:33:07 2010 +0200 @@ -0,0 +1,15 @@ + +class FeedEntry(object): + @staticmethod + def findById(id, session): + result = session.query(FeedEntry).filter(FeedEntry.id == id) + return result.first() + + def __init__(self, id, url, title, summary): + self.id = id + self.url = url + self.title = title + self.summary = summary + + def __repr__(self): + return "<FeedEntry (%d) %s>" % (self.pk, self.link)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FeedUpdater.py Mon Apr 26 19:33:07 2010 +0200 @@ -0,0 +1,30 @@ + +from Feed import Feed +from FeedEntry import FeedEntry +import feedparser + +def updateAllFeeds(session): + allFeeds = session.query(Feed) + for feed in allFeeds: + FeedUpdater(session, feed).update() + session.commit() + +class FeedUpdater(object): + def __init__(self, session, feed): + self.session = session + self.feed = feed + + def update(self): + result = feedparser.parse(self.feed.url) + for entry in result.entries: + self.processEntry(entry) + + def processEntry(self, entry): + feedEntry = FeedEntry.findById(entry.id, self.session) + if feedEntry is None: + self.createFeedEntry(entry) + + def createFeedEntry(self, entry): + newEntry = FeedEntry(id=entry.id, url=entry.link, title=entry.title, summary=entry.summary) + newEntry.feed = self.feed + self.session.add(newEntry)
--- a/Mapping.py Mon Apr 26 17:43:48 2010 +0200 +++ b/Mapping.py Mon Apr 26 19:33:07 2010 +0200 @@ -1,11 +1,14 @@ from Feed import Feed +from FeedEntry import FeedEntry from sqlalchemy import Column +from sqlalchemy import ForeignKey from sqlalchemy import Integer from sqlalchemy import MetaData from sqlalchemy import String from sqlalchemy import Table from sqlalchemy.orm import mapper +from sqlalchemy.orm import relation def createMapping(engine): metadata = MetaData(engine) @@ -15,7 +18,21 @@ Column("name", String(255), nullable=False), Column("url", String(255), nullable=False) ) + + feedEntryTable = Table("feed_entry", metadata, + Column("pk", Integer, primary_key=True), + Column("id", String(255), nullable=False), + Column("url", String(255), nullable=False), + Column("title", String, nullable=False), + Column("summary", String, nullable=False), + Column("feed_id", Integer, ForeignKey("feed.pk")) + ) metadata.create_all() - mapper(Feed, feedTable) + mapper(FeedEntry, feedEntryTable) + mapper(Feed, feedTable, + properties = { + "entries" : relation(FeedEntry, backref = "feed") + } + )
--- a/feedupdate-main.py Mon Apr 26 17:43:48 2010 +0200 +++ b/feedupdate-main.py Mon Apr 26 19:33:07 2010 +0200 @@ -2,6 +2,7 @@ from ConfigParser import ConfigParser from Feed import Feed +from FeedUpdater import updateAllFeeds import Mapping import socket from sqlalchemy import create_engine @@ -20,6 +21,13 @@ session.add(newFeed) session.commit() +def listFeeds(session): + allFeeds = session.query(Feed) + for feed in allFeeds: + print "\n\nfeed: " + feed.name + for entry in feed.entries: + print entry.title + if __name__ == "__main__": databaseUrl = loadDatabaseProperties() engine = create_engine(databaseUrl,echo=True) @@ -27,4 +35,6 @@ SessionMaker = sessionmaker(bind = engine) session = SessionMaker() - createFeed(session) + #createFeed(session) + updateAllFeeds(session) + #listFeeds(session)