Skip to content

Commit c492327

Browse files
author
virgesmith
committed
work on geog refactor #17 and removed nomis table name from queries, cloing #23
1 parent bbcab44 commit c492327

File tree

9 files changed

+65
-63
lines changed

9 files changed

+65
-63
lines changed

R/Nomisweb.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ queryMetadata = function() {
2828
#' geoCodeLookup(censusapi, "MSOA11")
2929
#' @export
3030
geoCodeLookup = function(api, geoCodeString) {
31-
return(as.integer(api$GeoCodeLookup[geoCodeString]))
31+
return(as.character(api$GeoCodeLookup[geoCodeString]))
3232
}
3333

3434
#' getMetadata()
@@ -76,9 +76,9 @@ getMetadata = function(api, tableName) {
7676
#' getData(censusapi, table, meta$nomis_table, queryParams)
7777
#' }
7878
#' @export
79-
getData = function(api, tableName, internalName, query) {
79+
getData = function(api, tableName, query) {
8080
# returned value is filename (or error) to avoid data frame compatibility issues
81-
filename = api$get_data(tableName, internalName, query, TRUE)
81+
filename = api$get_data(tableName, query, TRUE)
8282
# TODO check that string isnt an error!
8383
return(read.csv(filename, sep="\t", stringsAsFactors = FALSE))
8484
}
@@ -113,7 +113,7 @@ getLADCodes = function(api, laNames) {
113113
#' @export
114114
geoCodes = function(api, coverage, resolution) {
115115
# force correct types
116-
return(api$get_geo_codes(as.integer(coverage), as.integer(resolution)))
116+
return(api$get_geo_codes(as.integer(coverage), resolution))
117117
}
118118

119119
#' contextify

inst/examples/contextify.R

+1-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ cacheDir = "/tmp/UKCensusAPI"
1111
# Here's a predefined query, to which we add contextual data
1212

1313
table = "KS401EW"
14-
table_internal = "NM_618_1"
1514
queryParams = list(
1615
date = "latest",
1716
RURAL_URBAN = "0",
@@ -24,7 +23,7 @@ queryParams = list(
2423
api = instance(cacheDir)
2524

2625
# Fetch the data
27-
KS401EW = getData(api, table, table_internal, queryParams)
26+
KS401EW = getData(api, table, queryParams)
2827

2928
# Add the context...
3029
KS401EW = contextify(api, table, "CELL", KS401EW)

inst/examples/contextify.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ def main():
1616

1717
# Heres predefined query on a small geographical area
1818
table = "KS401EW"
19-
table_internal = "NM_618_1"
2019
query_params = {}
2120
query_params["CELL"] = "7...13"
2221
query_params["date"] = "latest"
@@ -25,7 +24,7 @@ def main():
2524
query_params["geography"] = "1245710558...1245710560"
2625
query_params["MEASURES"] = "20100"
2726

28-
ks401 = api.get_data(table, table_internal, query_params)
27+
ks401 = api.get_data(table, query_params)
2928
# display the first ten rows
3029
print(ks401.head(10))
3130

inst/examples/geoquery.R

+2-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ cacheDir = "/tmp/UKCensusAPI"
1111
# Here's a predefined query using Leeds at MSOA resolution,
1212
# but we want to change the geographical area and refine the resolution
1313
table = "KS401EW"
14-
table_internal = "NM_618_1"
1514
queryParams = list(
1615
date = "latest",
1716
RURAL_URBAN = "0",
@@ -24,15 +23,15 @@ queryParams = list(
2423
api = instance(cacheDir)
2524

2625
# Define the new region and resolution
27-
coverage = c("City of London")
26+
coverage = c("City of London", "Westminster")
2827
resolution = geoCodeLookup(api, "OA11") # OA 2011 - see NomiswebApi.py
2928

3029
# Modify the query
3130
coverageCodes = getLADCodes(api, coverage)
3231
queryParams["geography"] = geoCodes(api, coverageCodes, resolution)
3332

3433
# Fetch the new data
35-
KS401EW = getData(api, table, table_internal, queryParams)
34+
KS401EW = getData(api, table, queryParams)
3635

3736
# End of example
3837

inst/examples/geoquery.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ def main():
1111
# In the previous example we had a predefined query using Leeds at MSOA resolution,
1212
# but we want to expand the geographical area and refine the resolution
1313
table = "KS401EW"
14-
table_internal = "NM_618_1"
1514
query_params = {}
1615
query_params["CELL"] = "7...13"
1716
query_params["date"] = "latest"
@@ -29,15 +28,15 @@ def main():
2928
# replace the geography value in the query
3029
query_params["geography"] = api.get_geo_codes(coverage_codes, resolution)
3130
# get the data
32-
ks401fine = api.get_data(table, table_internal, query_params)
31+
ks401fine = api.get_data(table, query_params)
3332
print(ks401fine.head(5))
3433

3534
# Now widen the coverage to England & Wales and coarsen the resolution to LA
3635
coverage_codes = [Api.Nomisweb.GeoCodeLookup["EnglandWales"]]
3736
resolution = Api.Nomisweb.GeoCodeLookup["LAD"]
3837
query_params["geography"] = api.get_geo_codes(coverage_codes, resolution)
3938
# get the data
40-
ks401broad = api.get_data(table, table_internal, query_params)
39+
ks401broad = api.get_data(table, query_params)
4140
print(ks401broad.head(5))
4241

4342
if __name__ == "__main__":

tests/test_all.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -52,22 +52,21 @@ def test_get_url(self):
5252

5353
def test_get_data(self):
5454
table = "KS401EW"
55-
table_internal = "NM_618_1"
55+
# table_internal = "NM_618_1"
5656
query_params = {}
5757
query_params["CELL"] = "7...13"
5858
query_params["date"] = "latest"
5959
query_params["RURAL_URBAN"] = "0"
6060
query_params["select"] = "GEOGRAPHY_CODE,CELL,OBS_VALUE"
6161
query_params["geography"] = "1245710558...1245710560"
6262
query_params["MEASURES"] = "20100"
63-
table = self.api.get_data(table, table_internal, query_params)
63+
table = self.api.get_data(table, query_params)
6464
self.assertEqual(table.shape, (21, 3))
6565
self.assertEqual(sum(table.OBS_VALUE), 8214)
6666

6767
def test_get_and_add_descriptive_column(self):
6868

6969
table_name = "KS401EW"
70-
meta = self.api.load_metadata(table_name)
7170

7271
query_params = {}
7372
query_params["CELL"] = "7...13"
@@ -76,7 +75,7 @@ def test_get_and_add_descriptive_column(self):
7675
query_params["select"] = "GEOGRAPHY_CODE,CELL,OBS_VALUE"
7776
query_params["geography"] = "1245710558...1245710560"
7877
query_params["MEASURES"] = "20100"
79-
table = self.api.get_data(table_name, meta["nomis_table"], query_params)
78+
table = self.api.get_data(table_name, query_params)
8079
self.assertEqual(table.shape, (21, 3))
8180
self.assertEqual(sum(table.OBS_VALUE), 8214)
8281

tests/testthat/test-all.R

+12-14
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ skip_if_no_python_api = function() {
1414

1515
# simply checks we can get nomis geo codes back
1616
test_that("geoCodeLookup", {
17-
expect_true(UKCensusAPI::geoCodeLookup(api, "MSOA11") == 297)
18-
expect_true(UKCensusAPI::geoCodeLookup(api, "LSOA01") == 304)
19-
expect_true(UKCensusAPI::geoCodeLookup(api, "LAD") == 464)
20-
expect_true(UKCensusAPI::geoCodeLookup(api, "EnglandWales") == 2092957703)
17+
expect_true(UKCensusAPI::geoCodeLookup(api, "MSOA11") == "TYPE297")
18+
expect_true(UKCensusAPI::geoCodeLookup(api, "LSOA01") == "TYPE304")
19+
expect_true(UKCensusAPI::geoCodeLookup(api, "LAD") == "TYPE464")
20+
expect_true(UKCensusAPI::geoCodeLookup(api, "EnglandWales") == "2092957703")
2121
})
2222

2323
# simply checks we get data back
@@ -31,14 +31,13 @@ test_that("getMetadata", {
3131
test_that("getData", {
3232
skip_if_no_python_api()
3333
table = "KS401EW"
34-
table_internal = "NM_618_1"
3534
query = list(date = "latest",
3635
geography = "1245714681...1245714688",
3736
CELL = "7...13",
3837
RURAL_URBAN="0",
3938
measures = "20100",
4039
select = "GEOGRAPHY_CODE,CELL,OBS_VALUE")
41-
expect_true(class(UKCensusAPI::getData(api, table, table_internal, query)) == "data.frame")
40+
expect_true(class(UKCensusAPI::getData(api, table, query)) == "data.frame")
4241

4342
})
4443

@@ -65,45 +64,44 @@ test_that("getLADCodes", {
6564

6665
test_that("geoCodes empty", {
6766
skip_if_no_python_api()
68-
expect_true(geoCodes(api, c(), 999) == "")
67+
expect_true(geoCodes(api, c(), "TYPE999") == "")
6968
})
7069

7170
test_that("geoCodes invalid", {
7271
skip_if_no_python_api()
73-
expect_true(geoCodes(api, c(999), 999) == "")
72+
expect_true(geoCodes(api, c(999), "TYPE999") == "")
7473
})
7574

7675
test_that("geoCodes single LA", {
7776
skip_if_no_python_api()
78-
expect_true(geoCodes(api, 1946157124, 464) == "1946157124")
77+
expect_true(geoCodes(api, 1946157124, "TYPE464") == "1946157124")
7978
})
8079

8180
test_that("geoCodes multi MSOA", {
8281
skip_if_no_python_api()
83-
expect_true(geoCodes(api, c(1946157124, 1946157128), 297) == "1245710411...1245710471,1245710661...1245710705")
82+
expect_true(geoCodes(api, c(1946157124, 1946157128), "TYPE297") == "1245710411...1245710471,1245710661...1245710705")
8483
})
8584

8685
test_that("geoCodes multi LSOA", {
8786
skip_if_no_python_api()
88-
expect_true(geoCodes(api, c(1946157124, 1946157128), 298) == "1249912854...1249913154,1249913980...1249914188,1249935357...1249935365")
87+
expect_true(geoCodes(api, c(1946157124, 1946157128), "TYPE298") == "1249912854...1249913154,1249913980...1249914188,1249935357...1249935365")
8988
})
9089

9190
test_that("geoCodes single OA", {
9291
skip_if_no_python_api()
93-
expect_true(geoCodes(api, 1946157124, 299) == "1254148629...1254150034,1254267588...1254267709")
92+
expect_true(geoCodes(api, 1946157124, "TYPE299") == "1254148629...1254150034,1254267588...1254267709")
9493
})
9594

9695
test_that("contextify", {
9796
skip_if_no_python_api()
9897
table = "KS401EW"
99-
table_internal = "NM_618_1"
10098
query = list(date = "latest",
10199
geography = "1245714681...1245714688",
102100
CELL = "7...13",
103101
RURAL_URBAN="0",
104102
measures = "20100",
105103
select = "GEOGRAPHY_CODE,CELL,OBS_VALUE")
106-
data = UKCensusAPI::getData(api, table, table_internal, query)
104+
data = UKCensusAPI::getData(api, table, query)
107105
column = "CELL"
108106

109107
data = contextify(api, table, column, data)

ukcensusapi/Nomisweb.py

+23-18
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,18 @@ class Nomisweb:
3232
# https://www.nomisweb.co.uk/api/v01/dataset/NM_144_1/geography/2092957703TYPE464.def.sdmx.json
3333
# https://www.nomisweb.co.uk/api/v01/dataset/NM_1_1/geography/2092957703TYPE464.def.sdmx.json
3434
GeoCodeLookup = {
35-
"LAD": 464,
36-
"MSOA11": 297,
37-
"LSOA11": 298,
38-
"OA11": 299,
39-
"MSOA01": 305,
40-
"LSOA01": 304,
41-
"OA01": 310,
42-
"England": 2092957699,
43-
"EnglandWales": 2092957703,
44-
"GB": 2092957698,
45-
"UK": 2092957697
35+
# give meaning to some common nomis geography types/codes
36+
"LAD": "TYPE464",
37+
"MSOA11": "TYPE297",
38+
"LSOA11": "TYPE298",
39+
"OA11": "TYPE299",
40+
"MSOA01": "TYPE305",
41+
"LSOA01": "TYPE304",
42+
"OA01": "TYPE310",
43+
"England": "2092957699",
44+
"EnglandWales": "2092957703",
45+
"GB": "2092957698",
46+
"UK": "2092957697"
4647
}
4748

4849
# initialise, supplying a location to cache downloads
@@ -61,7 +62,8 @@ def __init__(self, cache_dir):
6162
os.mkdir(self.cache_dir)
6263
# TODO check dir created
6364
if Nomisweb.KEY is None:
64-
raise RuntimeError("no API key found. Giving up since downloads may be truncated.\n" \
65+
raise RuntimeError("No API key found. Whilst downloads still work, they may be truncated,\n" \
66+
"causing potentially unforseen problems in any modelling/analysis.\n" \
6567
"Set the key value in the environment variable NOMIS_API_KEY.\n" \
6668
"Register at www.nomisweb.co.uk to obtain a key")
6769

@@ -91,8 +93,7 @@ def get_geo_codes(self, la_codes, code_type):
9193

9294
geo_codes = []
9395
for i in range(0, len(la_codes)):
94-
path = "api/v01/dataset/NM_144_1/geography/" + str(la_codes[i]) + "TYPE" \
95-
+ str(code_type) + ".def.sdmx.json?"
96+
path = "api/v01/dataset/NM_144_1/geography/" + str(la_codes[i]) + code_type + ".def.sdmx.json?"
9697
rawdata = self.__fetch_json(path, {})
9798

9899
# use try-catch block to deal with any issues arising from the returned json
@@ -144,7 +145,7 @@ def get_url(self, table_internal, query_params):
144145
# Two reasons for this:
145146
# - pandas/R dataframes conversion is done via matrix (which drops col names)
146147
# - reporting errors to R is useful (print statements aren't displayed in R(Studio))
147-
def get_data(self, table, table_internal, query_params, r_compat=False):
148+
def get_data(self, table, query_params, r_compat=False):
148149
"""Downloads or retrieves data given a table and query parameters.
149150
Args:
150151
table: ONS table name
@@ -154,14 +155,15 @@ def get_data(self, table, table_internal, query_params, r_compat=False):
154155
a dataframe containing the data. If downloaded, the data is also cached to a file
155156
"""
156157
query_params["uid"] = Nomisweb.KEY
157-
query_string = self.get_url(table_internal, query_params)
158+
metadata = self.load_metadata(table)
159+
query_string = self.get_url(metadata["nomis_table"], query_params)
158160

159161
filename = self.cache_dir + table + "_" + hashlib.md5(query_string.encode()).hexdigest()+".tsv"
160162

161163
# retrieve if not in cache
162164
if not os.path.isfile(filename):
163-
meta = self.get_metadata(table)
164-
self.write_metadata(table, meta)
165+
meta = self.load_metadata(table)
166+
#self.write_metadata(table, meta)
165167
print("Downloading and cacheing data: " + filename)
166168
request.urlretrieve(query_string, filename) #, timeout = Nomisweb.Timeout)
167169

@@ -247,6 +249,9 @@ def get_metadata(self, table_name):
247249
"fields": fields,
248250
"geographies": geogs }
249251

252+
# save a copy
253+
self.write_metadata(table_name, result)
254+
250255
return result
251256

252257
# loads metadata from cached json if available, otherwises downloads from nomisweb.

ukcensusapi/Query.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,20 @@ def table(self):
4949

5050
add_geog = input("Add geography? (y/N): ") == "y"
5151
if add_geog:
52-
query_params["geography"] = self.__add_geog()
52+
query_params["geography"] = self.__add_geog(meta)
5353
#print(query_params)
5454

5555
get_data = input("Get data now? (y/N): ") == "y"
5656
if get_data:
5757
print("\n\nGetting data...")
5858

5959
# Fetch (and cache) data
60-
self.api.get_data(table, meta["nomis_table"], query_params)
60+
self.api.get_data(table, query_params)
6161

6262
# Remove API key in example code (lest it be accidentally committed)
6363
if "uid" in query_params:
6464
del query_params["uid"]
6565

66-
self.api.write_metadata(table, meta)
67-
6866
self.write_code_snippets(table, meta, query_params)
6967

7068
# returns a geography string that can be inserted into an existing query
@@ -77,16 +75,10 @@ def get_geog_from_names(self, coverage, resolution):
7775
coverage_codes = self.api.get_lad_codes(coverage)
7876
return self.api.get_geo_codes(coverage_codes, resolution)
7977

80-
def __add_geog(self):
78+
def __add_geog(self, metadata):
8179

8280
coverage = input("\nGeographical coverage\nE/EW/GB/UK or LAD codes(s)/name(s), comma separated: ")
8381

84-
resolution = input("Resolution (LAD/MSOA11/LSOA11/OA11/MSOA01/LSOA01/OA01): ")
85-
while not resolution in Api.Nomisweb.GeoCodeLookup.keys():
86-
print(resolution + " is not valid")
87-
resolution = input("Resolution (LAD/MSOA11/LSOA11/OA11/MSOA01/LSOA01/OA01): ")
88-
resolution = Api.Nomisweb.GeoCodeLookup[resolution]
89-
9082
if coverage == "E":
9183
coverage_codes = [Api.Nomisweb.GeoCodeLookup["England"]]
9284
elif coverage == "EW":
@@ -98,6 +90,18 @@ def __add_geog(self):
9890
else:
9991
coverage_codes = self.api.get_lad_codes(coverage.split(","))
10092

93+
#print(metadata)
94+
for key in metadata["geographies"]:
95+
print(key, metadata["geographies"][key])
96+
97+
resolution_valid = False
98+
while not resolution_valid:
99+
resolution = input("Select Resolution: ")
100+
if resolution in metadata["geographies"].keys():
101+
resolution_valid = True
102+
else:
103+
print(resolution + " is not valid")
104+
101105
area_codes = self.api.get_geo_codes(coverage_codes, resolution)
102106
return area_codes
103107

@@ -121,7 +125,7 @@ def write_code_snippets(self, table, meta, query_params):
121125
py_file.write("\nquery_params[\""+key+"\"] = \""+query_params[key]+"\"")
122126
if not "geography" in query_params:
123127
py_file.write("\n# TODO query_params[\"geography\"] = ...")
124-
py_file.write("\n" + table + " = api.get_data(table, table_internal, query_params)\n")
128+
py_file.write("\n" + table + " = api.get_data(table, query_params)\n")
125129

126130
print("\nWriting R code snippet to " + self.api.cache_dir + table + ".R")
127131
with open(self.api.cache_dir + table + ".R", "w") as r_file:
@@ -146,4 +150,4 @@ def write_code_snippets(self, table, meta, query_params):
146150
if not "geography" in query_params:
147151
r_file.write("\n # TODO add geography parameter to this query...")
148152
r_file.write("\n)")
149-
r_file.write("\n" + table + " = UKCensusAPI::getData(api, table, table_internal, queryParams)\n")
153+
r_file.write("\n" + table + " = UKCensusAPI::getData(api, table, queryParams)\n")

0 commit comments

Comments
 (0)