Skip to content

Commit 12c8549

Browse files
committed
3/11/2019
1 parent 6d69efa commit 12c8549

File tree

5 files changed

+154
-6
lines changed

5 files changed

+154
-6
lines changed
660 Bytes
Binary file not shown.

RDF Files/select rdf.py

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
import pandas as pd
4+
import settings
5+
from sqlalchemy import create_engine
6+
from sqlalchemy.engine.url import URL
7+
engine=create_engine(URL(**settings.DATABASE),connect_args={'charset':'utf8'})
8+
data=pd.read_sql_query('SELECT cl.claimID FROM centralifact.claim cl inner join claim_entity ce on cl.claimID=ce.claimID inner join entity e on e.entityID=ce.entityID where e.entity_text like %s',con=engine,params=("Hillary Clinton%",))
9+
print(data['claimID'])
10+
11+
data=pd.read_sql_query('SELECT cl.claimID FROM centralifact.claim cl inner join claim_entity ce on cl.claimID=ce.claimID inner join entity e on e.entityID=ce.entityID where e.entity_text like %s',con=engine,params=("Donald Trump%",))
12+
print(data['claimID'])

RDF Files/settings.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Scrapy settings for centralifact project
4+
#
5+
# For simplicity, this file contains only settings considered important or
6+
# commonly used. You can find more settings consulting the documentation:
7+
#
8+
# https://doc.scrapy.org/en/latest/topics/settings.html
9+
# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10+
# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11+
12+
BOT_NAME = 'centralifact'
13+
SPIDER_MODULES = ['centralifact.spiders']
14+
NEWSPIDER_MODULE = 'centralifact.spiders'
15+
DATABASE={
16+
'drivername': 'mysql',
17+
'host': 'burns.cs.indiana.edu',
18+
'port': '3306',
19+
'username': 'centralifact',
20+
'password': 'Z-J-K+C-Fact=My+Sql',
21+
'database': 'centralifact'
22+
}
23+
USER_AGENT = 'Indiana-University-Researcher-Zoher-Kachwala([email protected])'
24+
SPIDER_MIDDLEWARES = {
25+
'scrapy_deltafetch.DeltaFetch': 100,
26+
}
27+
DELTAFETCH_ENABLED = True
28+
DELTAFETCH_RESET = False
29+
AUTOTHROTTLE_ENABLED = True
30+
ROBOTSTXT_OBEY = True
31+
32+
# Crawl responsibly by identifying yourself (and your website) on the user-agent
33+
#USER_AGENT = 'centralifact (+http://www.yourdomain.com)'
34+
35+
# Obey robots.txt rules
36+
#ROBOTSTXT_OBEY = True
37+
38+
# Configure maximum concurrent requests performed by Scrapy (default: 16)
39+
#CONCURRENT_REQUESTS = 32
40+
41+
# Configure a delay for requests for the same website (default: 0)
42+
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
43+
# See also autothrottle settings and docs
44+
#DOWNLOAD_DELAY = 3
45+
# The download delay setting will honor only one of:
46+
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
47+
#CONCURRENT_REQUESTS_PER_IP = 16
48+
49+
# Disable cookies (enabled by default)
50+
#COOKIES_ENABLED = False
51+
52+
# Disable Telnet Console (enabled by default)
53+
#TELNETCONSOLE_ENABLED = False
54+
55+
# Override the default request headers:
56+
#DEFAULT_REQUEST_HEADERS = {
57+
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
58+
# 'Accept-Language': 'en',
59+
#}
60+
61+
# Enable or disable spider middlewares
62+
# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
63+
#SPIDER_MIDDLEWARES = {
64+
# 'centralifact.middlewares.CentralifactSpiderMiddleware': 543,
65+
#}
66+
67+
# Enable or disable downloader middlewares
68+
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
69+
#DOWNLOADER_MIDDLEWARES = {
70+
# 'centralifact.middlewares.CentralifactDownloaderMiddleware': 543,
71+
#}
72+
73+
# Enable or disable extensions
74+
# See https://doc.scrapy.org/en/latest/topics/extensions.html
75+
#EXTENSIONS = {
76+
# 'scrapy.extensions.telnet.TelnetConsole': None,
77+
#}
78+
79+
# Configure item pipelines
80+
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
81+
#ITEM_PIPELINES = {
82+
# 'centralifact.pipelines.CentralifactPipeline': 300,
83+
#}
84+
85+
# Enable and configure the AutoThrottle extension (disabled by default)
86+
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
87+
#AUTOTHROTTLE_ENABLED = True
88+
# The initial download delay
89+
#AUTOTHROTTLE_START_DELAY = 5
90+
# The maximum download delay to be set in case of high latencies
91+
#AUTOTHROTTLE_MAX_DELAY = 60
92+
# The average number of requests Scrapy should be sending in parallel to
93+
# each remote server
94+
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
95+
# Enable showing throttling stats for every response received:
96+
#AUTOTHROTTLE_DEBUG = False
97+
98+
# Enable and configure HTTP caching (disabled by default)
99+
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
100+
#HTTPCACHE_ENABLED = True
101+
#HTTPCACHE_EXPIRATION_SECS = 0
102+
#HTTPCACHE_DIR = 'httpcache'
103+
#HTTPCACHE_IGNORE_HTTP_CODES = []
104+
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

neo4j_commands.cypher

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ RETURN arg1,rel,arg2
1515

1616
//Loading RDF
1717
CREATE INDEX ON :Resource(uri)#prestep
18-
CALL semantics.importRDF("file:///claimreviews_db_2_12_10_52AM.rdf","RDF/XML", { shortenUrls: false, typesToLabels: true, commitSize: 9000 })
18+
CALL semantics.importRDF("file:///claimreviews_db_2_12_10_52AM.rdf","RDF/XML", { shortenUrls: false, typesToLabels: false, commitSize: 9000 })
1919

2020
CALL semantics.importRDF("file:///C:/Users/zoya/Desktop/Zoher/factcheckgraph/claimreviews_db_2_12_10_52AM.rdf","RDF/XML", { shortenUrls: false, typesToLabels: true, commitSize: 9000 })
2121

neo4j_sequence.cypher

+37-5
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,45 @@
1+
///Setting Degree
2+
MATCH (n) WITH n,
3+
MATCH (n)-[r]->() with n,r,size((n)-[r]->()) as degree
4+
RETURN degree
5+
6+
MATCH (n) WITH collect(n) as nodes
7+
unwind nodes as node
8+
MATCH (node)-[r]-() with r,size((node)-[r]->()) as degree
9+
SET r.degree=degree
10+
return r.degree
11+
12+
MATCH (n)-[r]->(m)
13+
WITH n, count(m) as c, collect(r) as rs
14+
UNWIND rs as r
15+
set n.outdegree=c
16+
set r.degree=c
17+
RETURN r.degree
18+
19+
120
////Get Donald_Trump rels
221
MATCH (n{uri:'http://dbpedia.org/resource/Donald_Trump'})-[r]-(m) Return n,r,m LIMIT 20
322
MATCH (n{uri:'http://dbpedia.org/resource/Barack_Obama'})-[r]-(m) Return n,r,m LIMIT 20
423
http://dbpedia.org/resource/Barack_Obama
524
MATCH (n{uri:'http://dbpedia.org/resource/Donald_Trump'})-[r*1..2]-(m) Return n,r,m LIMIT 50
625
MATCH a=(n:`schema.org/Person`)-[r*1..3]->(m:`schema.org/Person`) Return a LIMIT 20
726
MATCH a=(n:`schema.org/Person`)-[r*1..3]->(m:`schema.org/Person`) Return a LIMIT 20
8-
MATCH a=(n{uri:'http://dbpedia.org/resource/Donald_Trump'})-[*0..3]-(m) where m.uri starts with 'http://dbpedia.org/resource/' Return a LIMIT 100
27+
MATCH a=(n{uri:'http://dbpedia.org/resource/Donald_Trump'})-[*1..5]-(m) where m.uri starts with 'http://dbpedia.org/resource/' Return a LIMIT 100
928
//
1029
MATCH a=(n{uri:'http://dbpedia.org/resource/Donald_Trump'})-[*1..10]->(m{uri:'http://dbpedia.org/resource/Hillary_Clinton'}) Return a LIMIT 10
1130
MATCH (from{uri:'http://dbpedia.org/resource/Donald_Trump'}),(to{uri:'http://dbpedia.org/resource/Hillary_Clinton'}) CALL algo.shortestPath.stream(from, to, "cost")
1231
yield path as path, cost as cost
1332
return path,cost
14-
15-
MATCH (from{uri:'http://dbpedia.org/resource/Donald_Trump'}),(to{uri:'http://dbpedia.org/resource/United_States'}),path = shortestpath((from)-[*]-(to))
33+
//
34+
MATCH (n) WHERE n.uri = 'http://dbpedia.org/resource/Hillary_Clinton'
35+
CALL apoc.path.subgraphNodes(n, {maxLevel:1,whitelistNodes:"+'http://schema.org/Person'"}) YIELD node
36+
RETURN node
37+
//
38+
MATCH (n{uri:'http://dbpedia.org/resource/Hillary_Clinton'}),(m) where m.uri starts with 'http://dbpedia.org/resource/'
39+
CALL apoc.path.subgraphNodes(n, {maxLevel:10,whitelistNodes:m}) YIELD node
40+
RETURN node
41+
//
42+
MATCH (from{uri:'http://dbpedia.org/resource/Vladimir_Putin'}),(to{uri:'http://dbpedia.org/resource/United_States'}),path = shortestpath((from)-[*]-(to))
1643
with path
1744
WHERE length(path)>2
1845
return path
@@ -34,7 +61,7 @@ MATCH (n)-[r]-(n) delete r
3461

3562
//1 Import
3663
CREATE INDEX ON :Resource(uri)//prestep
37-
CALL semantics.importRDF("file:///C:/Users/zoya/Downloads/claimreviews_db.rdf","RDF/XML", { shortenUrls: false, typesToLabels: true, commitSize: 25000 })
64+
CALL semantics.importRDF("file:///C:/Users/zoya/Downloads/claimreviews_db.rdf","RDF/XML", { shortenUrls: false, typesToLabels: false, commitSize: 25000 })
3865
//2 labeling the nodes
3966
MATCH (n) with n, SPLIT(n.uri,"/")[-1] as name SET n.label=name return n.label
4067
MATCH (n) with n, SPLIT(n.label,"#")[-1] as name SET n.label=name return n.label
@@ -90,4 +117,9 @@ delete r1,o,r,m
90117
//
91118
MATCH ()-[r:`http://www.ontologydesignpatterns.org/ont/fred/quantifiers.owl#hasDeterminer`]->(m)
92119
OPTIONAL MATCH (m)-[r1]->(o)
93-
delete r1,o,r,m
120+
delete r1,o,r,m
121+
122+
MATCH (n) with collect([n,n.uri]) as events
123+
unwind events as event
124+
set event[0].label=semantics.getIRILocalName(event[1])
125+
return event

0 commit comments

Comments
 (0)