Skip to content

Commit

Permalink
my.location: let takeout provider be in a separate my.location.google…
Browse files Browse the repository at this point in the history
…; add CI test & enable mypy
  • Loading branch information
karlicoss committed Oct 8, 2020
1 parent 90ada92 commit ba9acc3
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 35 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "testdata/hpi-testdata"]
path = testdata/hpi-testdata
url = https://github.com/karlicoss/hpi-testdata
[submodule "testdata/track"]
path = testdata/track
url = https://github.com/tajtiattila/track
3 changes: 3 additions & 0 deletions doc/example_config/my/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ class exercise:

class bluemaestro:
export_path: Paths = ''

class google:
takeout_path: Paths = ''
1 change: 0 additions & 1 deletion my/location/__init__.py

This file was deleted.

64 changes: 31 additions & 33 deletions my/location/takeout.py → my/location/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@

import json
from collections import deque
from datetime import datetime
from datetime import datetime, timezone
from itertools import islice
from pathlib import Path
from subprocess import Popen, PIPE
from typing import Any, Collection, Deque, Iterable, Iterator, List, NamedTuple, Optional, Sequence, IO, Tuple
import re

import pytz

# pip3 install geopy
import geopy # type: ignore
import geopy.distance # type: ignore
Expand All @@ -22,6 +21,11 @@
from ..kython import kompress


# otherwise uses ijson
# todo move to config??
USE_GREP = False


logger = LazyLogger(__name__)


Expand Down Expand Up @@ -57,18 +61,20 @@ def _iter_via_ijson(fo) -> Iterator[TsLatLon]:
)


# todo ugh. fragile, not sure, maybe should do some assert in advance?
def _iter_via_grep(fo) -> Iterator[TsLatLon]:
# grep version takes 5 seconds for 1M items (without processing)
x = [None, None, None]
x = [-1, -1, -1]
for i, line in enumerate(fo):
if i > 0 and i % 3 == 0:
yield tuple(x)
yield tuple(x) # type: ignore[misc]
n = re.search(b': "?(-?\\d+)"?,?$', line) # meh. somewhat fragile...
assert n is not None
j = i % 3
x[j] = int(n.group(1).decode('ascii'))
# make sure it's read what we expected
assert (i + 1) % 3 == 0
yield tuple(x)
yield tuple(x) # type: ignore[misc]


# todo could also use pool? not sure if that would really be faster...
Expand Down Expand Up @@ -102,7 +108,7 @@ def tagger(dt: datetime, point: geopy.Point) -> Tag:
return None

for tsMs, latE7, lonE7 in fit:
dt = datetime.fromtimestamp(tsMs / 1000, tz=pytz.utc)
dt = datetime.fromtimestamp(tsMs / 1000, tz=timezone.utc)
total += 1
if total % 10000 == 0:
logger.info('processing item %d %s', total, dt)
Expand Down Expand Up @@ -150,31 +156,33 @@ def _iter_locations(path: Path, start=0, stop=None) -> Iterator[Location]:
# todo CPath? although not sure if it can be iterative?
ctx = kompress.open(path, _LOCATION_JSON)

# with ctx as fo:
# fit = _iter_via_ijson(fo)
# fit = islice(fit, start, stop)
# yield from _iter_locations_fo(fit)

unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"'
extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'"
from subprocess import Popen, PIPE
with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p:
out = p.stdout; assert out is not None
fit = _iter_via_grep(out)
fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit)
if USE_GREP:
unzip = f'unzip -p "{path}" "{_LOCATION_JSON}"'
extract = "grep -E '^ .(timestampMs|latitudeE7|longitudeE7)'"
with Popen(f'{unzip} | {extract}', shell=True, stdout=PIPE) as p:
out = p.stdout; assert out is not None
fit = _iter_via_grep(out)
fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit)
else:
with ctx as fo:
# todo need to open as bytes
fit = _iter_via_ijson(fo)
fit = islice(fit, start, stop)
yield from _iter_locations_fo(fit)
# todo wonder if old takeouts could contribute as well??


def iter_locations(**kwargs) -> Iterator[Location]:
def locations(**kwargs) -> Iterator[Location]:
# TODO need to include older data
last_takeout = get_last_takeout(path=_LOCATION_JSON)

return _iter_locations(path=last_takeout, **kwargs)


# todo deprecate?
def get_locations(*args, **kwargs) -> Sequence[Location]:
return list(iter_locations(*args, **kwargs))
return list(locations(*args, **kwargs))


class LocInterval(NamedTuple):
Expand Down Expand Up @@ -222,7 +230,7 @@ def __getitem__(self, i):
# todo cachew as well?
# TODO maybe if tag is none, we just don't care?
def get_groups(*args, **kwargs) -> List[LocInterval]:
all_locations = iter(iter_locations(*args, **kwargs))
all_locations = iter(locations(*args, **kwargs))
locsi = Window(all_locations)
i = 0
groups: List[LocInterval] = []
Expand Down Expand Up @@ -269,13 +277,3 @@ def dump_group():
pass
dump_group()
return groups


# TODO not sure if necessary anymore...
def update_cache():
# TODO perhaps set hash to null instead, that's a bit less intrusive
cp = cache_path()
if cp.exists():
cp.unlink()
for _ in iter_locations():
pass
1 change: 1 addition & 0 deletions testdata/track
Submodule track added at 2e8a33
37 changes: 37 additions & 0 deletions tests/location.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from pathlib import Path

from more_itertools import one

import pytest # type: ignore


def test() -> None:
from my.location.google import locations
locs = list(locations())
assert len(locs) == 3810

last = locs[-1]
assert last.dt.strftime('%Y%m%d %H:%M:%S') == '20170802 13:01:56' # should be utc
# todo approx
assert last.lat == 46.5515350
assert last.lon == 16.4742742
# todo check altitude


@pytest.fixture(autouse=True)
def prepare(tmp_path: Path):
testdata = Path(__file__).absolute().parent.parent / 'testdata'
assert testdata.exists(), testdata

track = one(testdata.rglob('italy-slovenia-2017-07-29.json'))

# todo ugh. unnecessary zipping, but at the moment takeout provider doesn't support plain dirs
import zipfile
with zipfile.ZipFile(tmp_path / 'takeout.zip', 'w') as zf:
zf.writestr('Takeout/Location History/Location History.json', track.read_bytes())

from my.cfg import config
class user_config:
takeout_path = tmp_path
config.google = user_config # type: ignore
yield
8 changes: 7 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,19 @@ commands =
# todo these are probably not necessary anymore?
python3 -c 'from my.config import stub as config; print(config.key)'
python3 -c 'import my.config; import my.config.repos' # shouldn't fail at least

# my.location.google deps
pip install geopy ijson

python3 -m pytest \
tests/core.py \
tests/misc.py \
tests/get_files.py \
tests/config.py::test_set_repo \
tests/config.py::test_environment_variable \
tests/demo.py \
tests/bluemaestro.py
tests/bluemaestro.py \
tests/location.py
# TODO add; once I figure out porg depdencency?? tests/config.py
# TODO run demo.py? just make sure with_my is a bit cleverer?
# TODO e.g. under CI, rely on installing
Expand Down Expand Up @@ -57,6 +62,7 @@ commands =
-p my.body.exercise.cardio \
-p my.body.exercise.cross_trainer \
-p my.bluemaestro \
-p my.location.google \
--txt-report .mypy-coverage \
--html-report .mypy-coverage \
{posargs}
Expand Down

0 comments on commit ba9acc3

Please sign in to comment.