-
Notifications
You must be signed in to change notification settings - Fork 229
/
Copy pathconversion.py
327 lines (279 loc) · 9.57 KB
/
conversion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
"""
Functions to convert data types into ctypes friendly formats.
"""
import warnings
import numpy as np
from pygmt.exceptions import GMTInvalidInput
def dataarray_to_matrix(grid):
"""
Transform an xarray.DataArray into a data 2-D array and metadata.
Use this to extract the underlying numpy array of data and the region and
increment for the grid.
Only allows grids with two dimensions and constant grid spacing (GMT
doesn't allow variable grid spacing). If the latitude and/or longitude
increments of the input grid are negative, the output matrix will be
sorted by the DataArray coordinates to yield positive increments.
If the underlying data array is not C contiguous, for example if it's a
slice of a larger grid, a copy will need to be generated.
Parameters
----------
grid : xarray.DataArray
The input grid as a DataArray instance. Information is retrieved from
the coordinate arrays, not from headers.
Returns
-------
matrix : 2-D array
The 2-D array of data from the grid.
region : list
The West, East, South, North boundaries of the grid.
inc : list
The grid spacing in East-West and North-South, respectively.
Raises
------
GMTInvalidInput
If the grid has more than two dimensions or variable grid spacing.
Examples
--------
>>> from pygmt.datasets import load_earth_relief
>>> # Use the global Earth relief grid with 1 degree spacing
>>> grid = load_earth_relief(resolution="01d", registration="pixel")
>>> matrix, region, inc = dataarray_to_matrix(grid)
>>> print(region)
[-180.0, 180.0, -90.0, 90.0]
>>> print(inc)
[1.0, 1.0]
>>> type(matrix)
<class 'numpy.ndarray'>
>>> print(matrix.shape)
(180, 360)
>>> matrix.flags.c_contiguous
True
>>> # Using a slice of the grid, the matrix will be copied to guarantee
>>> # that it's C-contiguous in memory. The increment should be unchanged.
>>> matrix, region, inc = dataarray_to_matrix(grid[10:41, 30:101])
>>> matrix.flags.c_contiguous
True
>>> print(matrix.shape)
(31, 71)
>>> print(region)
[-150.0, -79.0, -80.0, -49.0]
>>> print(inc)
[1.0, 1.0]
>>> # but not if only taking every other grid point.
>>> matrix, region, inc = dataarray_to_matrix(grid[10:41:2, 30:101:2])
>>> matrix.flags.c_contiguous
True
>>> print(matrix.shape)
(16, 36)
>>> print(region)
[-150.5, -78.5, -80.5, -48.5]
>>> print(inc)
[2.0, 2.0]
"""
if len(grid.dims) != 2:
raise GMTInvalidInput(
f"Invalid number of grid dimensions '{len(grid.dims)}'. Must be 2."
)
# Extract region and inc from the grid
region = []
inc = []
# Reverse the dims because it is rows, columns ordered. In geographic
# grids, this would be North-South, East-West. GMT's region and inc are
# East-West, North-South.
for dim in grid.dims[::-1]:
coord = grid.coords[dim].values
coord_incs = coord[1:] - coord[0:-1]
coord_inc = coord_incs[0]
if not np.allclose(coord_incs, coord_inc):
# calculate the increment if irregular spacing is found
coord_inc = (coord[-1] - coord[0]) / (coord.size - 1)
msg = (
f"Grid may have irregular spacing in the '{dim}' dimension, "
"but GMT only supports regular spacing. Calculated regular spacing "
f"{coord_inc} is assumed in the '{dim}' dimension."
)
warnings.warn(msg, category=RuntimeWarning)
if coord_inc == 0:
raise GMTInvalidInput(
f"Grid has a zero increment in the '{dim}' dimension."
)
region.extend(
[
coord.min() - coord_inc / 2 * grid.gmt.registration,
coord.max() + coord_inc / 2 * grid.gmt.registration,
]
)
inc.append(coord_inc)
if any(i < 0 for i in inc): # Sort grid when there are negative increments
inc = [abs(i) for i in inc]
grid = grid.sortby(variables=list(grid.dims), ascending=True)
matrix = as_c_contiguous(grid[::-1].values)
return matrix, region, inc
def vectors_to_arrays(vectors):
"""
Convert 1-D vectors (lists, arrays, or pandas.Series) to C contiguous 1-D
arrays.
Arrays must be in C contiguous order for us to pass their memory pointers
to GMT. If any are not, convert them to C order (which requires copying the
memory). This usually happens when vectors are columns of a 2-D array or
have been sliced.
If a vector is a list or pandas.Series, get the underlying numpy array.
Parameters
----------
vectors : list of lists, 1-D arrays, or pandas.Series
The vectors that must be converted.
Returns
-------
arrays : list of 1-D arrays
The converted numpy arrays
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> data = np.array([[1, 2], [3, 4], [5, 6]])
>>> vectors = [data[:, 0], data[:, 1], pd.Series(data=[-1, -2, -3])]
>>> all(i.flags.c_contiguous for i in vectors)
False
>>> all(isinstance(i, np.ndarray) for i in vectors)
False
>>> arrays = vectors_to_arrays(vectors)
>>> all(i.flags.c_contiguous for i in arrays)
True
>>> all(isinstance(i, np.ndarray) for i in arrays)
True
>>> data = [[1, 2], (3, 4), range(5, 7)]
>>> all(isinstance(i, np.ndarray) for i in vectors_to_arrays(data))
True
"""
arrays = [as_c_contiguous(np.asarray(i)) for i in vectors]
return arrays
def as_c_contiguous(array):
"""
Ensure a numpy array is C contiguous in memory.
If the array is not C contiguous, a copy will be necessary.
Parameters
----------
array : 1-D array
The numpy array
Returns
-------
array : 1-D array
Array is C contiguous order.
Examples
--------
>>> import numpy as np
>>> data = np.array([[1, 2], [3, 4], [5, 6]])
>>> x = data[:, 0]
>>> x
array([1, 3, 5])
>>> x.flags.c_contiguous
False
>>> new_x = as_c_contiguous(x)
>>> new_x
array([1, 3, 5])
>>> new_x.flags.c_contiguous
True
>>> x = np.array([8, 9, 10])
>>> x.flags.c_contiguous
True
>>> as_c_contiguous(x).flags.c_contiguous
True
"""
if not array.flags.c_contiguous:
return array.copy(order="C")
return array
def kwargs_to_ctypes_array(argument, kwargs, dtype):
"""
Convert an iterable argument from kwargs into a ctypes array variable.
If the argument is not present in kwargs, returns ``None``.
Parameters
----------
argument : str
The name of the argument.
kwargs : dict
Dictionary of keyword arguments.
dtype : ctypes type
The ctypes array type (e.g., ``ctypes.c_double*4``)
Returns
-------
ctypes_value : ctypes array or None
Examples
--------
>>> import ctypes as ct
>>> value = kwargs_to_ctypes_array("bla", {"bla": [10, 10]}, ct.c_long * 2)
>>> type(value)
<class 'pygmt.clib.conversion.c_long_Array_2'>
>>> should_be_none = kwargs_to_ctypes_array(
... "swallow", {"bla": 1, "foo": [20, 30]}, ct.c_int * 2
... )
>>> print(should_be_none)
None
"""
if argument in kwargs:
return dtype(*kwargs[argument])
return None
def array_to_datetime(array):
"""
Convert a 1-D datetime array from various types into numpy.datetime64.
If the input array is not in legal datetime formats, raise a ValueError
exception.
Parameters
----------
array : list or 1-D array
The input datetime array in various formats.
Supported types:
- str
- numpy.datetime64
- pandas.DateTimeIndex
- datetime.datetime and datetime.date
Returns
-------
array : 1-D datetime array in numpy.datetime64
Raises
------
ValueError
If the datetime string is invalid.
Examples
--------
>>> import datetime
>>> # numpy.datetime64 array
>>> x = np.array(
... ["2010-06-01", "2011-06-01T12", "2012-01-01T12:34:56"],
... dtype="datetime64[ns]",
... )
>>> array_to_datetime(x)
array(['2010-06-01T00:00:00.000000000', '2011-06-01T12:00:00.000000000',
'2012-01-01T12:34:56.000000000'], dtype='datetime64[ns]')
>>> # pandas.DateTimeIndex array
>>> import pandas as pd
>>> x = pd.date_range("2013", freq="YS", periods=3)
>>> array_to_datetime(x)
array(['2013-01-01T00:00:00.000000000', '2014-01-01T00:00:00.000000000',
'2015-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
>>> # Python's built-in date and datetime
>>> x = [datetime.date(2018, 1, 1), datetime.datetime(2019, 1, 1)]
>>> array_to_datetime(x)
array(['2018-01-01T00:00:00.000000', '2019-01-01T00:00:00.000000'],
dtype='datetime64[us]')
>>> # Raw datetime strings in various format
>>> x = [
... "2018",
... "2018-02",
... "2018-03-01",
... "2018-04-01T01:02:03",
... ]
>>> array_to_datetime(x)
array(['2018-01-01T00:00:00', '2018-02-01T00:00:00',
'2018-03-01T00:00:00', '2018-04-01T01:02:03'],
dtype='datetime64[s]')
>>> # Mixed datetime types
>>> x = [
... "2018-01-01",
... np.datetime64("2018-01-01"),
... datetime.datetime(2018, 1, 1),
... ]
>>> array_to_datetime(x)
array(['2018-01-01T00:00:00.000000', '2018-01-01T00:00:00.000000',
'2018-01-01T00:00:00.000000'], dtype='datetime64[us]')
"""
return np.asarray(array, dtype=np.datetime64)