-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrssclass.py
More file actions
executable file
·118 lines (87 loc) · 2.71 KB
/
rssclass.py
File metadata and controls
executable file
·118 lines (87 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/python
Usage = """RSS feed class to select papers of a specific pattern from e.g., arXiv."""
import feedparser, re, datetime, pickle
__author__ = "Jonny Elliott"
__copyright__ = "Copyright 2011"
__credits__ = ""
__license__ = "GPL"
__version__ = "0.0"
__maintainer__ = "Jonny Elliott"
__email__ = "jonnyelliott@mpe.mpg.de"
__status__ = "Prototype"
class rssfeed(object):
def __init__(self, weburl="http://arxiv.org/rss/astro-ph", patterns=["gamma-ray", "Gamma-ray", "Gamma-Ray", "burst"]):
self._weburl = weburl
self._patterns = patterns
self._title_papers = []
self._summary_papers = []
self._keyword_papers = []
self._date = datetime.date.today()
self._feed = []
def setPatterns(self, patterns):
if type(patterns) != type([]):
print "Please set as list"
else:
self._patterns = patterns
def writeFeedToFile(self, outname):
# save the dictionary to file
if self._summary_papers != []:
Papers = [i for i in self._summary_papers]
else:
Papers = []
if self._keyword_papers != []:
for i in self._keyword_papers:
Papers.append(i)
try:
outputfile = open(outname, "w")
suc = pickle.dump(Papers, outputfile)
outputfile.close()
except:
print "Error saving feed."
#print suc
return(2)
def loadFeedFromFile(self, inname):
try:
inputfile = open(inname, "r")
feed = pickle.load(inputfile)
inputfile.close()
except:
print "Error loading feed."
return(1)
self._feed = feed
def getPapersTitle(self):
# load the feed
feed = feedparser.parse(self._weburl)
titleofinterest = []
# collect items that fit the patterns selected
for item in feed["items"]:
title = item["title"]
for pattern in self._patterns:
if re.search(pattern, title):
titleofinterest.append(item)
self._title_papers = titleofinterest
def getPapersSummary(self):
# load the feed
feed = feedparser.parse(self._weburl)
summaryofinterest = []
# collect the items that fit the patterns selected
for item in feed["items"]:
summary = item["summary"]
for pattern in self._patterns:
if re.search(pattern, summary):
summaryofinterest.append(item)
self._summary_papers = summaryofinterest
# Redundant for arXiv
def getPapersKeyword(self):
# load the feed
feed = feedparser.parse(self._weburl)
keywordofinterest = []
# collect the items that fit the patterns selected
for item in feed["items"]:
keyword = item["keywords"]
for pattern in self._patterns:
if re.search(pattern, keyword):
keywordofinterest.append(item)
self._keyword_papers = keywordofinterest
if __name__ == "__main__":
print Usage