Skip to content

Commit

Permalink
some cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
agricolamz committed Apr 17, 2024
1 parent fcf2247 commit 30c53ab
Show file tree
Hide file tree
Showing 10 changed files with 4,734 additions and 14,387 deletions.
2,151 changes: 0 additions & 2,151 deletions data/asya_features.csv

This file was deleted.

9,789 changes: 4,733 additions & 5,056 deletions data/database.csv

Large diffs are not rendered by default.

Binary file modified data/database.xlsx
Binary file not shown.
1,468 changes: 0 additions & 1,468 deletions data/kostya_features.csv

This file was deleted.

2,284 changes: 0 additions & 2,284 deletions data/nikita_phonology_3.csv

This file was deleted.

1,123 changes: 0 additions & 1,123 deletions data/rutul_dialectology_ilya.csv

This file was deleted.

2,283 changes: 0 additions & 2,283 deletions data/rutul_dialects_200.csv

This file was deleted.

Binary file removed data/verb_2024-02-04.xlsx
Binary file not shown.
19 changes: 0 additions & 19 deletions scripts/cleaning_data_for_database.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,25 +119,6 @@ df |>
arrange(feature_id) |>
write_csv("data/database.csv", na = "", append = TRUE)

# add Kostya's and Asya's nouns -------------------------------------------
read_csv("data/database.csv", col_select = "feature_id") |>
distinct() |>
filter(feature_id == max(feature_id)) |>
pull(feature_id) ->
max_id_in_db

read_csv("data/noun_features_2023-05-25.csv") |>
filter(!is.na(value)) ->
df

df |>
mutate(feature_id = as.double(factor(feature_title))+max_id_in_db) |>
select(feature_id, feature_title, feature_lexeme, feature_description, collected, compiled, updated_day,
updated_month, updated_year, domain, settlement, value, stimuli, answer) |>
arrange(feature_id) |>
write_csv("data/database.csv", na = "", append = TRUE)


# add Maxim's demonstratives ----------------------------------------------
read_csv("data/database.csv", col_select = "feature_id") |>
distinct() |>
Expand Down
4 changes: 1 addition & 3 deletions scripts/visualize.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ library(tidyverse)
library(widyr)

# all
read_csv("https://raw.githubusercontent.com/LingConLab/rutul_dialectology/master/data/database.csv") ->
df
df <- read_csv("https://raw.githubusercontent.com/LingConLab/rutul_dialectology/master/data/database.csv")

for_plot_title <- "with all stimuli"

Expand Down Expand Up @@ -102,7 +101,6 @@ dist_gold_standard |>
plot()
title(main = str_c("neighborNet ", for_plot_title))


dist_gold_standard |>
cmdscale(k = 3) |>
as.data.frame() |>
Expand Down

0 comments on commit 30c53ab

Please sign in to comment.