-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_dummies_test.py
More file actions
195 lines (187 loc) · 9.55 KB
/
Copy pathget_dummies_test.py
File metadata and controls
195 lines (187 loc) · 9.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
Python 3.6.1 |Continuum Analytics, Inc.| (default, May 11 2017, 13:04:09)
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin
Type "copyright", "credits" or "license()" for more information.
>>> WARNING: The version of Tcl/Tk (8.5.9) in use may be unstable.
Visit http://www.python.org/download/mac/tcltk/ for current information.
from pandas import DataFrame,Series
>>> X = pd.DataFrame([[["apples","pears","oranges"],["guava","strawberry","passion"]]])
Traceback (most recent call last):
File "<pyshell#1>", line 1, in <module>
X = pd.DataFrame([[["apples","pears","oranges"],["guava","strawberry","passion"]]])
NameError: name 'pd' is not defined
>>> X = DataFrame([[["apples","pears","oranges"],["guava","strawberry","passion"]]])
SyntaxError: unexpected indent
>>> X = DataFrame([[["apples","pears","oranges"],["guava","strawberry","passion"]]])
>>> df[0]
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
df[0]
NameError: name 'df' is not defined
>>> X
0 1
0 [apples, pears, oranges] [guava, strawberry, passion]
>>> tags = X[0].apply(pd.Series)
Traceback (most recent call last):
File "<pyshell#6>", line 1, in <module>
tags = X[0].apply(pd.Series)
NameError: name 'pd' is not defined
>>> tags = X[0].apply(Series)
>>> tags
0 1 2
0 apples pears oranges
>>> tags = tags.rename(columns = lambda x: 'tag_'+str(x))
>>> tags
tag_0 tag_1 tag_2
0 apples pears oranges
>>> tags.get_dummies()
Traceback (most recent call last):
File "<pyshell#11>", line 1, in <module>
tags.get_dummies()
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py", line 3614, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'get_dummies'
>>> from pandas import get_dummies
>>> get_dummies(tags)
tag_0_apples tag_1_pears tag_2_oranges
0 1 1 1
>>> from sklearn.preprocessing import MultiLabelBinarizer()
SyntaxError: invalid syntax
>>> from sklearn.preprocessing import MultiLabelBinarizer
>>> binarizer = MultiLabelBinarizer().fit(tags)
>>> binarizer.classes_
array(['0', '1', '2', '_', 'a', 'g', 't'], dtype=object)
>>> binarizer = MultiLabelBinarizer().fit(tags.values)
>>> binarizer.classes_
array(['apples', 'oranges', 'pears'], dtype=object)
>>> binarizer.transform(tags)
Traceback (most recent call last):
File "<pyshell#20>", line 1, in <module>
binarizer.transform(tags)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/sklearn/preprocessing/label.py", line 765, in transform
yt = self._transform(y, class_to_index)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/sklearn/preprocessing/label.py", line 789, in _transform
indices.extend(set(class_mapping[label] for label in labels))
File "/Users/montana/miniconda3/lib/python3.6/site-packages/sklearn/preprocessing/label.py", line 789, in <genexpr>
indices.extend(set(class_mapping[label] for label in labels))
KeyError: 't'
>>> binarizer.transform(tags.values)
array([[1, 1, 1]])
>>> X = DataFrame([[["apples","pears","oranges"],["guava","strawberry","passion"]],[["bananas","grapes","grapefruit"],["agave","acai","blueberry"]]])
>>> X
0 1
0 [apples, pears, oranges] [guava, strawberry, passion]
1 [bananas, grapes, grapefruit] [agave, acai, blueberry]
>>> df.applymap(pd.Series)
Traceback (most recent call last):
File "<pyshell#24>", line 1, in <module>
df.applymap(pd.Series)
NameError: name 'df' is not defined
>>> X.applymap(pd.Series)
Traceback (most recent call last):
File "<pyshell#25>", line 1, in <module>
X.applymap(pd.Series)
NameError: name 'pd' is not defined
>>> X.applymap(Series)
0 \
0 0 apples
1 pears
2 oranges
dtype: ...
1 0 bananas
1 grapes
2 grapefrui...
1
0 0 guava
1 strawberry
2 passio...
1 0 agave
1 acai
2 blueberry
d...
>>> X[0].apply(Series)
0 1 2
0 apples pears oranges
1 bananas grapes grapefruit
>>> get_dummies(X[0].apply(Series))
0_apples 0_bananas 1_grapes 1_pears 2_grapefruit 2_oranges
0 1 0 0 1 0 1
1 0 1 1 0 1 0
>>> get_dummies(Series([["apples","bananas","oranges"],["grapes","pears","grapefruit"]]))
Traceback (most recent call last):
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/categorical.py", line 330, in __init__
codes, categories = factorize(values, sort=True)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/algorithms.py", line 471, in factorize
labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)
File "pandas/_libs/hashtable_class_helper.pxi", line 1367, in pandas._libs.hashtable.PyObjectHashTable.get_labels
TypeError: unhashable type: 'list'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<pyshell#29>", line 1, in <module>
get_dummies(Series([["apples","bananas","oranges"],["grapes","pears","grapefruit"]]))
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/reshape/reshape.py", line 1215, in get_dummies
sparse=sparse, drop_first=drop_first)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/reshape/reshape.py", line 1222, in _get_dummies_1d
codes, levels = _factorize_from_iterable(Series(data))
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/categorical.py", line 2324, in _factorize_from_iterable
cat = Categorical(values, ordered=True)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/categorical.py", line 332, in __init__
codes, categories = factorize(values, sort=False)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/algorithms.py", line 471, in factorize
labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)
File "pandas/_libs/hashtable_class_helper.pxi", line 1367, in pandas._libs.hashtable.PyObjectHashTable.get_labels
TypeError: unhashable type: 'list'
>>> get_dummies(Series(DataFrame([["apples","bananas","oranges"],["grapes","pears","grapefruit"]])))
Traceback (most recent call last):
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/common.py", line 399, in _asarray_tuplesafe
result[:] = values
ValueError: could not broadcast input array from shape (2,3) into shape (2)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<pyshell#30>", line 1, in <module>
get_dummies(Series(DataFrame([["apples","bananas","oranges"],["grapes","pears","grapefruit"]])))
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 264, in __init__
raise_cast_failure=True)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 3277, in _sanitize_array
subarr = _asarray_tuplesafe(data, dtype=dtype)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/common.py", line 402, in _asarray_tuplesafe
result[:] = [tuple(x) for x in values]
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/common.py", line 402, in <listcomp>
result[:] = [tuple(x) for x in values]
TypeError: 'int' object is not iterable
>>> Y = Series(DataFrame([["apples","bananas","oranges"],["grapes","pears","grapefruit"]]))
Traceback (most recent call last):
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/common.py", line 399, in _asarray_tuplesafe
result[:] = values
ValueError: could not broadcast input array from shape (2,3) into shape (2)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<pyshell#31>", line 1, in <module>
Y = Series(DataFrame([["apples","bananas","oranges"],["grapes","pears","grapefruit"]]))
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 264, in __init__
raise_cast_failure=True)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/series.py", line 3277, in _sanitize_array
subarr = _asarray_tuplesafe(data, dtype=dtype)
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/common.py", line 402, in _asarray_tuplesafe
result[:] = [tuple(x) for x in values]
File "/Users/montana/miniconda3/lib/python3.6/site-packages/pandas/core/common.py", line 402, in <listcomp>
result[:] = [tuple(x) for x in values]
TypeError: 'int' object is not iterable
>>> X[0] = pd.Series([["apples","oranges","pears"],["bananas","grapes","grapefruit"]])
Traceback (most recent call last):
File "<pyshell#32>", line 1, in <module>
X[0] = pd.Series([["apples","oranges","pears"],["bananas","grapes","grapefruit"]])
NameError: name 'pd' is not defined
>>> X[0] = Series([["apples","oranges","pears"],["bananas","grapes","grapefruit"]])
>>> X[0]
0 [apples, oranges, pears]
1 [bananas, grapes, grapefruit]
Name: 0, dtype: object
>>> X[0].apply(Series)
0 1 2
0 apples oranges pears
1 bananas grapes grapefruit
>>> get_dummies(X[0].apply(Series))
0_apples 0_bananas 1_grapes 1_oranges 2_grapefruit 2_pears
0 1 0 0 1 0 1
1 0 1 1 0 1 0
>>>