-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
38 lines (31 loc) · 1012 Bytes
/
main.py
File metadata and controls
38 lines (31 loc) · 1012 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#! /usr/bin/python
# -*- coding:utf-8 -*-
import src.getarticle
import src.findlinks
import src.namearticle
import src.collection
import src.nbpages
import src.xml
import unicodedata
#url = "http://bmjopen.bmj.com/content/5/6/e007470.full"
#url = "http://bmjopen.bmj.com/content/5/5/e007898.full"
#url = "http://bmjopen.bmj.com/content/3/3/e002489.full"
#url = "http://bmjopen.bmj.com/content/5/4/e006740.full"
#url = "http://bmjopen.bmj.com/content/1/2/bmjopen-2011-000240.full"
base = "http://bmjopen.bmj.com/collections/bmj_open_"
# set the collection type
type_collection = "epidemiology"
url = base+type_collection+"?page="+str(1)
max_pages = nbpages.no_pages(url)
#print("this collection contains "+str(max_pages)+" pages")
#getarticle.get_article(url)
#i = max_pages - 2
i = 1
while i <= max_pages :
url = str(base)+str(type_collection)+"?page="+str(i)
print url
for link in findlinks.find_links(url):
print link
i+=1
#collection.get_collection(url)
#print namearticle.name_article(url)