-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjoin-anly.r
42 lines (39 loc) · 1.18 KB
/
join-anly.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
### Data wrangling
setwd('~/')
setwd('Dropbox/my_code/kaggle_listings')
# After running the python script, I put all of the feature csv's
# into a folder 'columns'
x <- list.files('columns')
# collect all file names, depending on if they are from
# train.json or test.json
train.names <- rep(NA, 14)
test.names <- rep(NA, 14)
train.ind <- 1
test.ind <- 1
for (i in 1:length(x)){
if (grepl("train", x[i])) {
train.names[train.ind] <- x[i]
train.ind <- train.ind + 1
} else if (grepl("test", x[i])) {
test.names[test.ind] <- x[i]
test.ind <- test.ind + 1
} else {
print("grepl error!")
}
}
print(train.names)
print(test.names)
dir.intro <- paste(getwd(), "/columns/", sep="")
train.df <- read.csv(paste(dir.intro, train.names[1], sep=""))
for (i in 2:length(train.names)) {
df.next <- read.csv(paste(dir.intro, train.names[i], sep=""))
train.df <- merge(train.df, df.next, by="id")
}
write.csv(train.df, file="training.csv")
test.df <- read.csv(paste(dir.intro, test.names[1], sep=""))
for (i in 2:length(test.names)) {
df.next <- read.csv(paste(dir.intro, test.names[i], sep=""))
test.df <- merge(test.df, df.next, by="id")
}
write.csv(test.df, file="test.csv")
######