forked from eilishmcmaster/BaseSoSAnalyses
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathploidy.R
More file actions
87 lines (68 loc) · 3.2 KB
/
ploidy.R
File metadata and controls
87 lines (68 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
library(openxlsx) #reading and writing xlsx
library(stringr) #replacing strings while data wrangling
library(dplyr) #data wrangling
library(data.table) #data wrangling
library(ggfortify) # calculating glm and pca
library(ggpubr) # ggarrange
library(pracma) # maths
library(RRtools) #Jason's package for dart data
library(ggthemes) #themes for ggplots
library(RColorBrewer) #used for making colour scemes for plots
library(ozmaps) #draws australia coastlines and state boundaries
library(adegenet) #essential for processing dart data
library(ggrepel) #used for plotting labels on ggplote
topskip <- 6
nmetavar <- 18
RandRbase <- "" #main directory
species <- "LantCama" #species name
dataset <- "DLan22-7500" #dart order
missingness <- 0.3
# source my custom functions
devtools::source_url("https://github.com/eilishmcmaster/SoS_functions/blob/main/sos_functions.R?raw=TRUE")
setwd("/Users/eilishmcmaster/Documents/LantCama")
devtools::source_url("https://github.com/eilishmcmaster/SoS_functions/blob/ea46cc026bb56cafd339f5af383c94f46e0de2dd/read_dart_counts_csv_faster_new.r?raw=TRUE")
counts2 <- read_dart_counts_csv_faster('LantCama/dart_raw/Report_DLan22-7500_3_moreOrders_SNPcount_3.csv', # import readcount data
minAlleleCount=1,
minGenotypeCount=0)
m2 <- custom.read(species, dataset) #read custom metadata csv
read_histogram_function <- function(meta, sp, counts, filter_reads){
species <- unique(meta$sp[!is.na(meta$sp)])
# filter the reads
combined_reads <- counts$c1 + counts$c2
counts$c1[combined_reads < filter_reads] <- NA
counts$c2[combined_reads < filter_reads] <- NA
# get the proportions for all (rows are samples)
c3_min <- pmin(t(counts$c1), t(counts$c2), na.rm = TRUE) / t(combined_reads)
c3_max <- pmax(t(counts$c1), t(counts$c2), na.rm = TRUE) / t(combined_reads)
c3 <- cbind(c3_min, c3_max)
for (i in seq_along(species)) {
print(paste("Running", species[i], "now"))
samples <- meta$sample[meta$sp == species[i]]
c3_species <- c3[row.names(c3) %in% samples, ]
par(mfrow = c(4, 5), mai = c(0.5, 0.2, 0.2, 0.2)) # Set up a 2 x 2 plotting space
hist(c3_species, main = species[i], xlab = "", ylab = "", breaks = 50, col = "red", xaxt = 'n')
axis(side = 1, at = c(0, 0.25, 0.5, 0.75, 1), labels = c(0, 0.25, 0.5, 0.75, 1))
if(class(c3_species) %in% c("array", "matrix")){
loop.vector <- 1:nrow(c3_species)
for (i in loop.vector) { # Loop over loop.vector
# store data in row.i as x
x <- c3_species[i,]
if(sum(x, na.rm=TRUE)>0){ # skip empties
# Plot histogram of x
hist(x,breaks=50,
main = paste(rownames(c3_species)[i]),
xlab = "",#"MAF reads/ total reads",
ylab="",
xlim = c(0, 1),
xaxt='n')
axis(side = 1, at = c(0, 0.25, 0.5, 0.75, 1), labels=c(0, 0.25, 0.5, 0.75, 1))
}
}
}
}
}
start <- Sys.time()
pdf(file="lantana_all_10read_nomaf_pminmethod.pdf")
read_histogram_function(m2, sp2, counts2, 10) #needs meta, analysis column, counts data, and minimum number of reads per cell
dev.off()
fin <- Sys.time()