@@ -92,3 +92,281 @@ cal_house:
9292 - Sold Price
9393 metric : rmse
9494 problem_type : regression
95+ base : &base
96+ url : s3://automl-mm-bench/{name}/{split}.csv
97+ test_split_name : test
98+ splits :
99+ - train
100+ - test
101+ feature_columns :
102+ - ImageID
103+ label_columns :
104+ - LabelName
105+ image_columns :
106+ text_columns :
107+ columns_to_drop :
108+ metric : acc
109+ problem_type : multiclass
110+
111+
112+ prod :
113+ << : *base
114+ url : s3://automl-mm-bench/machine_hack_product_sentiment/{split}.csv
115+ test_split_name : dev
116+ feature_columns :
117+ - Product_Description
118+ - Product_Type
119+ label_columns :
120+ - Sentiment
121+
122+ airbnb :
123+ << : *base
124+ url : s3://automl-mm-bench/airbnb_melbourne/{split}.pq
125+ feature_columns :
126+ null
127+ label_columns :
128+ - price_label
129+ ignore_columns :
130+ - id
131+ - listing_url
132+ - scrape_id
133+ - last_scraped
134+ - picture_url
135+ - host_id
136+ - host_url
137+ - host_name
138+ - host_thumbnail_url
139+ - host_picture_url
140+ - monthly_price
141+ - weekly_price
142+ - price
143+ - calendar_last_scraped
144+
145+ channel :
146+ << : *base
147+ url : s3://automl-mm-bench/news_channel/{split}.csv
148+ feature_columns :
149+ null
150+ label_columns :
151+ - channel
152+ ignore_columns :
153+ null
154+
155+ wine :
156+ << : *base
157+ url : s3://automl-mm-bench/wine_reviews/{split}.csv
158+ feature_columns :
159+ null
160+ label_columns :
161+ - variety
162+ ignore_columns :
163+ null
164+
165+ imdb :
166+ << : *base
167+ url : s3://automl-mm-bench/imdb_genre_prediction/{split}.csv
168+ feature_columns :
169+ null
170+ label_columns :
171+ - Genre_is_Drama
172+ ignore_columns :
173+ null
174+ metric : roc_auc
175+ problem_type : binary
176+
177+ jigsaw :
178+ << : *base
179+ url : s3://automl-mm-bench/jigsaw_unintended_bias100K/{split}.pq
180+ feature_columns :
181+ - comment_text
182+ - asian
183+ - atheist
184+ - bisexual
185+ - black
186+ - buddhist
187+ - christian
188+ - female
189+ - heterosexual
190+ - hindu
191+ - homosexual_gay_or_lesbian
192+ - intellectual_or_learning_disability
193+ - jewish
194+ - latino
195+ - male
196+ - muslim
197+ - other_disability
198+ - other_gender
199+ - other_race_or_ethnicity
200+ - other_religion
201+ - other_sexual_orientation
202+ - physical_disability
203+ - psychiatric_or_mental_illness
204+ - transgender
205+ - white
206+ - funny
207+ - wow
208+ - sad
209+ - likes
210+ - disagree
211+ label_columns :
212+ - target
213+ metric : roc_auc
214+ problem_type : binary
215+
216+ fake :
217+ << : *base
218+ url : s3://automl-mm-bench/fake_job_postings2/{split}.csv
219+ feature_columns :
220+ null
221+ label_columns :
222+ - fraudulent
223+ ignore_columns :
224+ null
225+ metric : roc_auc
226+ problem_type : binary
227+
228+ kick :
229+ << : *base
230+ url : s3://automl-mm-bench/kick_starter_funding/{split}.csv
231+ feature_columns :
232+ null
233+ label_columns :
234+ - final_status
235+ ignore_columns :
236+ null
237+ metric : roc_auc
238+ problem_type : binary
239+
240+ ae :
241+ << : *base
242+ url : s3://automl-mm-bench/ae_price_prediction/{split}.pq
243+ feature_columns :
244+ null
245+ label_columns :
246+ - price
247+ ignore_columns :
248+ - mrp
249+ - pdp_url
250+ metric : r2
251+ problem_type : regression
252+
253+ qaa :
254+ << : *base
255+ url : s3://automl-mm-bench/google_quest_qa/{split}.pq
256+ test_split_name : dev
257+ feature_columns :
258+ - question_title
259+ - question_body
260+ - answer
261+ - category
262+ label_columns :
263+ - answer_type_reason_explanation
264+ metric : r2
265+ problem_type : regression
266+
267+ qaq :
268+ << : *base
269+ url : s3://automl-mm-bench/google_quest_qa/{split}.pq
270+ test_split_name : dev
271+ feature_columns :
272+ - question_title
273+ - question_body
274+ - answer
275+ - category
276+ label_columns :
277+ - question_type_reason_explanation
278+ metric : r2
279+ problem_type : regression
280+
281+ cloth :
282+ << : *base
283+ url : s3://automl-mm-bench/women_clothing_review/{split}.pq
284+ feature_columns :
285+ - Title
286+ - Review Text
287+ - Age
288+ - Division Name
289+ - Department Name
290+ - Class Name
291+ label_columns :
292+ - Rating
293+ metric : r2
294+ problem_type : regression
295+
296+ mercari :
297+ << : *base
298+ url : s3://automl-mm-bench/mercari_price_suggestion100K/{split}.pq
299+ feature_columns :
300+ null
301+ label_columns :
302+ - log_price
303+ ignore_columns :
304+ - train_id
305+ - price
306+ metric : r2
307+ problem_type : regression
308+
309+ jc :
310+ << : *base
311+ url : s3://automl-mm-bench/jc_penney_products/{split}.csv
312+ feature_columns :
313+ null
314+ label_columns :
315+ - sale_price
316+ ignore_columns :
317+ null
318+ metric : r2
319+ problem_type : regression
320+
321+ pop :
322+ << : *base
323+ url : s3://automl-mm-bench/news_popularity2/{split}.csv
324+ feature_columns :
325+ null
326+ label_columns :
327+ - log_shares
328+ ignore_columns :
329+ null
330+ metric : r2
331+ problem_type : regression
332+
333+ book :
334+ << : *base
335+ url : s3://automl-mm-bench/machine_hack_competitions/predict_the_price_of_books/{split}.csv
336+ feature_columns :
337+ - Title
338+ - Author
339+ - Edition
340+ - Reviews
341+ - Ratings
342+ - Synopsis
343+ - Genre
344+ - BookCategory
345+ label_columns :
346+ - Price
347+ ignore_columns :
348+ null
349+ metric : r2
350+ problem_type : regression
351+
352+ salary :
353+ << : *base
354+ url : s3://automl-mm-bench/machine_hack_competitions/predict_the_data_scientists_salary_in_india_hackathon/{split}.csv
355+ feature_columns :
356+ null
357+ label_columns :
358+ - salary
359+ ignore_columns :
360+ null
361+ metric : acc
362+ problem_type : multiclass
363+
364+ house :
365+ << : *base
366+ url : s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
367+ feature_columns :
368+ null
369+ label_columns :
370+ - Sold Price
371+ metric : r2
372+ problem_type : regression
0 commit comments