Skip to content

Commit

Permalink
Merge pull request #75 from googleinterns/dual-infer
Browse files Browse the repository at this point in the history
Dual-encoder with inference on RVS data
  • Loading branch information
tzufgoogle authored Oct 27, 2020
2 parents f03456b + c598857 commit 0f98788
Show file tree
Hide file tree
Showing 16 changed files with 466 additions and 128 deletions.
3 changes: 2 additions & 1 deletion cabby/data/extract_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def testQueryWithOSM(self):
foundFigleaf = True
self.assertEqual(sample.sample_type, 'OSM')
self.assertEqual(
sample.text, 'Figleaf and building and East Carson Street.')
sample.text,
'Figleaf and building and East Carson Street.')
self.assertTrue(foundFigleaf)


Expand Down
2 changes: 1 addition & 1 deletion cabby/data/wikigeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from cabby.data.wikidata import item as wdi
from cabby.data.wikipedia import item as wpi

VERSION = 0.15
VERSION = 0.17

@attr.s
class WikigeoEntity:
Expand Down
25 changes: 16 additions & 9 deletions cabby/geo/map_processing/map_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from shapely.geometry.point import Point
from shapely.geometry.polygon import Polygon
from shapely.geometry import box, mapping, LineString
from shapely.geometry.multipolygon import MultiPolygon
from shapely import wkt
from shapely.ops import split

Expand Down Expand Up @@ -96,7 +97,7 @@ def get_poi(self) -> Tuple[GeoSeries, GeoSeries]:
'brand': True,
'tourism': True}

osm_poi = ox.pois.pois_from_polygon(self.polygon_area, tags=tags)
osm_poi = ox.geometries_from_polygon(self.polygon_area, tags=tags)

osm_highway = osm_poi['highway']
osm_poi_no_streets = osm_poi[osm_highway.isnull()]
Expand Down Expand Up @@ -132,20 +133,26 @@ def add_single_poi_to_graph(
# Project POI on to the closest edge in graph.
geometry = single_poi['geometry']
if isinstance(geometry, Point):
points = [single_poi['geometry']]
elif isinstance(geometry, Polygon):
coords = single_poi['geometry'].exterior.coords
points = [geometry]
else: # Polygon or LineString
if isinstance(geometry, Polygon):
coords = geometry.exterior.coords
elif isinstance(geometry, LineString):
coords = geometry.coords
elif isinstance(geometry, MultiPolygon):
coords = [poly.exterior.coords[0] for poly in geometry]
else:
return None
n_points = len(coords)

# Sample maximum 4 points.
sample_1 = Point(coords[0])
sample_2 = Point(coords[round(n_points/4)])
sample_3 = Point(coords[round(n_points/2)])
sample_4 = Point(coords[round(3*n_points/4)])
sample_2 = Point(coords[int(n_points/4)])
sample_3 = Point(coords[int(n_points/2)])
sample_4 = Point(coords[int(3*n_points/4)])
points = [sample_1, sample_2, sample_3, sample_4]
points = points[0:4]
else:
return single_poi['osmid']


poi_osmid = single_poi['osmid']

Expand Down
71 changes: 59 additions & 12 deletions cabby/geo/map_processing/plot.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,25 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3
"version": "3.7.6-final"
},
"orig_nbformat": 2
"orig_nbformat": 2,
"kernelspec": {
"name": "Python 3.7.6 64-bit ('cabby': conda)",
"display_name": "Python 3.7.6 64-bit ('cabby': conda)",
"metadata": {
"interpreter": {
"hash": "ce358741096736da10e99ee280471ecb431e6109b306ee40a97e643e65256963"
}
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -40,28 +49,42 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"DIRECTORY = \"./poiTestData/\"\n",
"sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd() ))))\n",
"sys.path\n",
"from cabby.geo.map_processing import map_structure\n",
"from cabby.geo import regions\n",
"from cabby.geo import util\n",
"import folium\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"DIRECTORY = \"/mnt/hackney/data/cabby/map/v17\"\n",
"LEVEL = 18\n",
"REGION = \"Pittsburgh\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Load from disk.\n",
"map_new = map_structure.Map(REGION, LEVEL, DIRECTORY)"
"map_new = map_structure.Map(regions.get_region(REGION), LEVEL, DIRECTORY)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -72,7 +95,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -90,7 +113,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -100,16 +123,40 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 35,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'util' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-35-cdefa5bd3720>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmap_new\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madd_nodes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mmap_new\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'geometry'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madd_edges\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mmap_osm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/envs/cabby/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, raw, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 7539\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7540\u001b[0m )\n\u001b[0;32m-> 7541\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7542\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7543\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"DataFrame\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/envs/cabby/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/envs/cabby/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 253\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 255\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mres_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/envs/cabby/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;31m# ignore SettingWithCopy here in case the user mutates\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;31m# If we have a view on v, we need to make a copy because\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-33-69d421f50056>\u001b[0m in \u001b[0;36madd_nodes\u001b[0;34m(row)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0madd_nodes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# draw the points\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mpoint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlist_yx_from_point\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'geometry'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m folium.Circle(location = [row['geometry'].y,\n",
"\u001b[0;31mNameError\u001b[0m: name 'util' is not defined"
]
}
],
"source": [
"\n",
"map_new.nodes.apply(add_nodes, axis=1)\n",
"map_new.edges['geometry'].apply(add_edges)\n",
"\n",
"map_osm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}
28 changes: 25 additions & 3 deletions cabby/geo/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from s2geometry import pywraps2 as s2
from shapely.geometry.point import Point
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon
from shapely.geometry import box, mapping, LineString
import sys
from typing import Optional, Tuple, Sequence, Any, Text
Expand Down Expand Up @@ -450,9 +451,10 @@ def get_distance_between_geometries(geometry: Any, point: Point) -> float:
if isinstance(geometry, Point):
return get_distance_between_points(geometry, point)
else:
return get_polygon_distance_from_point(geometry, point)
return get_distance_between_point_to_geometry(geometry, point)

def get_polygon_distance_from_point(poly: Polygon, point: Point) -> float:
def get_distance_between_point_to_geometry(
geometry: Any, point: Point) -> float:
'''Calculate the distance between point and polygon in meters.
Arguments:
route: The line that length calculation will be performed on.
Expand All @@ -461,7 +463,13 @@ def get_polygon_distance_from_point(poly: Polygon, point: Point) -> float:
The distance between point and polygon in meters.
'''
dist_min = float("Inf")
for coord in poly.exterior.coords:
if isinstance(geometry, MultiPolygon):
coords = [coord for poly in geometry for coord in poly.exterior.coords]
elif isinstance(geometry, Polygon):
coords = geometry.exterior.coords
else:
coords = geometry.coords
for coord in coords:
point_current = Point(coord)
dist = get_distance_between_points(point, point_current)
if dist_min > dist:
Expand All @@ -486,6 +494,20 @@ def get_line_length(line: LineString) -> float:
return dist


def point_from_list_coord(coord: Sequence) -> Point:
'''Converts coordinates in list format (latitude and longtitude) to Point.
E.g, of list [40.715865, -74.037258].
Arguments:
coord: A lat-lng coordinate to be converted to a point.
Returns:
A point.
'''
lat = coord[0]
lon = coord[1]

return Point(lon, lat)


def point_from_str_coord(coord_str: Text) -> Point:
'''Converts coordinates in string format (latitude and longtitude) to Point.
E.g, of string '(40.715865, -74.037258)'.
Expand Down
16 changes: 13 additions & 3 deletions cabby/model/text/dual_encoder/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,25 @@ package(


py_library(
name = 'dataset',
srcs = ['dataset.py'],
name = 'dataset_wikigeo',
srcs = ['dataset_wikigeo.py'],
deps = [
":dataset_item",
"//cabby/geo:util",
"//cabby/geo:regions",

],
)
py_library(
name = 'dataset_rvs',
srcs = ['dataset_rvs.py'],
deps = [
":dataset_item",
"//cabby/geo:util",
"//cabby/geo:regions",

],
)

py_library(
name = 'dataset_item',
Expand Down Expand Up @@ -51,7 +60,8 @@ py_binary(
srcs = ['model_trainer.py'],
deps = [
':train',
':dataset',
':dataset_wikigeo',
':dataset_rvs',
':dataset_item',
':model',
"//cabby/model/text:util",
Expand Down
8 changes: 4 additions & 4 deletions cabby/model/text/dual_encoder/dataset_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from shapely.geometry.point import Point
from shapely.geometry import box, mapping, LineString
import sys
from typing import Text, Dict, Any
from typing import Any, Dict, Text
import torch

import attr
Expand All @@ -45,7 +45,6 @@ class TextGeoDataset:
unique_cellids_binary: torch.tensor = attr.ib()
label_to_cellid: Dict[int, int] = attr.ib()


@classmethod
def from_TextGeoSplit(cls, train, valid, test, unique_cellids,
unique_cellids_binary, label_to_cellid):
Expand All @@ -64,7 +63,6 @@ def load(cls, dataset_path: Text, train_path_dataset: Text,
valid_path_dataset: Text, test_path_dataset: Text,
unique_cellid_path: Text, tensor_cellid_path: Text,
label_to_cellid_path: Text):


logging.info("Loading dataset from <== {}.".format(dataset_path))
train_dataset = torch.load(train_path_dataset)
Expand All @@ -76,7 +74,9 @@ def load(cls, dataset_path: Text, train_path_dataset: Text,
label_to_cellid_path, allow_pickle='TRUE').item()
tens_cells = torch.load(tensor_cellid_path)
n_cells = len(unique_cellid)
dataset_text = TextGeoDataset(train_dataset, valid_dataset, test_dataset, unique_cellid, tens_cells, label_to_cellid)
dataset_text = TextGeoDataset(
train_dataset, valid_dataset, test_dataset,
unique_cellid, tens_cells, label_to_cellid)

return dataset_text

Expand Down
Loading

0 comments on commit 0f98788

Please sign in to comment.