weecology · gmyenni · Feb 6, 2026 · Feb 5, 2026
diff --git a/DataCleaningScripts/clean_pit_tags.R b/DataCleaningScripts/clean_pit_tags.R
@@ -1,3 +1,70 @@
+library(dplyr, warn.conflicts=FALSE, quietly = TRUE)
+
+# Assigns unique individual IDs to new rodent data
+# modified from clean_tags below to work on monthly data
+#
+#
+add_id <- function(raw_data, rodent_data, new_period){
+
+  rodent_data <- rodent_data %>%
+    filter(period<new_period,period>=new_period-36)
+
+  # get all ids
+  all_ids <- rodent_data %>%
+    select(species,tag,pit_tag,id) %>%
+    distinct()
+
+  # get existing tags
+  current_tags <- rodent_data %>%
+    filter(pit_tag==TRUE) %>%
+    select(tag,id) %>%
+    distinct()
+
+  # append the tag type
+  raw_data$pit_tag <- PIT_tag(raw_data$tag, raw_data$species)
+
+  # add PIT tag-based ids, one day at a time (for pesky weekend recaptures)
+  new_data1 <- raw_data %>%
+               filter(day == min(day,na.rm = TRUE)) %>%
+    left_join(current_tags, by="tag") %>%
+    mutate(id = case_when(pit_tag==TRUE & is.na(id) ~ paste0(tag, "_", species, "_1") ,
+                          TRUE ~ id))
+  current_tags <- rbind(current_tags,new_data1[!is.na(new_data1$tag),c(20,31)]) %>%
+    distinct()
+
+  new_data2 <- raw_data %>%
+    filter(!(day %in% new_data1$day)) %>%
+    left_join(current_tags, by="tag") %>%
+    mutate(id = case_when(pit_tag==TRUE & is.na(id) ~ paste0(tag, "_", species, "_1") ,
+                          TRUE ~ id)) 
+
+  new_data <- rbind(new_data1,new_data2)
+
+  # get latest untagged id number
+  max_id <- all_ids %>% filter(pit_tag==FALSE) %>%
+    mutate(untag_id = sub("_.*", "", id)) %>%
+    mutate(untag_id=as.numeric(untag_id)) %>%
+    summarise(max(untag_id,na.rm=TRUE))
+
+  #assign numbers to untagged individuals
+  unks <- which(is.na(new_data$species) == FALSE & new_data$pit_tag==FALSE)
+  nunks <- length(unks)
+
+  start_id <- max_id[1,1] + 1
+  end_id <- start_id + nunks - 1
+  new_data$id[unks] <- start_id:end_id
+
+  # create new PIT tag based ids
+  new_data <- new_data %>%
+    mutate(id = case_when(pit_tag==FALSE & is.na(species)==FALSE ~ paste0(id, "_", species, "_1") ,
+                          TRUE ~ id))
+
+  new_data  %>%
+    dplyr::mutate(id = ifelse(is.na(species), NA, id),
+                  pit_tag = ifelse(is.na(species), NA, pit_tag))
+
+  }
+
 #
 # determines if a tag is a PIT tag by its structure
 #  requires species input for comparison, returns a logical vector

diff --git a/DataCleaningScripts/new_rodent_data.r b/DataCleaningScripts/new_rodent_data.r
@@ -365,23 +365,40 @@ if (length(tags) > 0) {
 }
 
 # Add unique tag ID column
+ws_tags <- add_id(ws, olddat, new_period) 
 
-#ws_tags <- clean_tags(ws, clean = TRUE, quiet = FALSE) 
+old_ids <- olddat %>%
+  filter(period<new_period-36) %>%
+  select(species,tag,pit_tag,id) %>%
+  distinct()
+old_tags <- old_ids %>%
+  filter(pit_tag==TRUE) %>%
+  select(species,tag,pit_tag,id) %>%
+  distinct()
 
-#if(is.na(ws_tags$id[which(!is.na(ws_tags$species))])) { 
-#  print("Duplicate individuals in tag data, recheck tags") }
-#if(dim(ws_tags)!=dim(ws)) { 
-#  print("Records dropped in tag data, recheck tags") }
+if(any(ws_tags$id[!is.na(ws_tags$id)] %in% old_ids$id)) { 
+  print("Duplicate ids created, check ids") }
+
+if(any(ws_tags$tag %in% old_tags$tag)) { 
+  print("Duplicate PIT tags, recheck tags") }
+
+if(any(is.na(ws_tags$id[which(!is.na(ws_tags$species))]))) { 
+  print("Duplicate individuals in tag data, recheck tags") }
+
+if(dim(ws_tags)[1]>dim(ws)[1]) { 
+  print("Records duplicated, this tag has 2 IDs") 
+  print(ws_tags[duplicated(ws_tags[,-c(30:31)]),]) }
+
+if(dim(ws_tags)[1]<dim(ws)[1]) { 
+  print("Records dropped in tag data, check tags") }
 
-#ws <- dplyr::left_join(ws, ws_tags)
-ws<-dplyr::mutate(ws, pit_tag=NA, id=NA)
 
 ##############################################################################
 # 4. Append new data
 ##############################################################################
 
 # make column of record IDs for new data
-newdat = cbind(recordID = seq(max(olddat$recordID) + 1, max(olddat$recordID) + length(ws$month)), ws)
+newdat = cbind(recordID = seq(max(olddat$recordID) + 1, max(olddat$recordID) + length(ws$month)), ws_tags)
 
 # append to existing data file
 #write.table(newdat, "./Rodents/Portal_rodent.csv", row.names = F, na = "", append=T, sep=",", col.names = F, quote = c(9,10,11,12,13,14,15,16,17,20,21,22,23,24,25,26,27,28,29))