diff --git a/InfoAndInputs/.gitignore b/InfoAndInputs/.gitignore
new file mode 100644
index 00000000..e43b0f98
--- /dev/null
+++ b/InfoAndInputs/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/phyloscannerR/.gitignore b/phyloscannerR/.gitignore
index 807ea251..728e389b 100644
--- a/phyloscannerR/.gitignore
+++ b/phyloscannerR/.gitignore
@@ -1,3 +1,4 @@
 .Rproj.user
 .Rhistory
 .RData
+.DS_Store
diff --git a/phyloscannerR/DESCRIPTION b/phyloscannerR/DESCRIPTION
index 05d5a78c..003b530d 100644
--- a/phyloscannerR/DESCRIPTION
+++ b/phyloscannerR/DESCRIPTION
@@ -4,7 +4,7 @@ Version: 1.8.0
 Authors@R: c(person("Matthew", "Hall", email = "matthew.hall@bdi.ox.ac.uk", role = c("aut", "cre")), person("Chris", "Wymant", email = "matthew.hall@bdi.ox.ac.uk", role = c("aut")), person("Oliver", "Ratmann", email = "matthew.hall@bdi.ox.ac.uk", role = c("aut")))
 Description: An R package for the second half of phyloscanner (tree analysis).
 Depends: R (>= 3.4.0)
-Imports: ape, argparse, extraDistr, ff, GGally, ggtree, grid, gtable, kimisc, network, pegas, phangorn, phytools, prodlim, RColorBrewer, reshape2, scales, sna, tidyverse, treeio (>= 1.6.2), viridis
+Imports: ape, argparse, extraDistr, ff, GGally, ggtree, grid, gtable, kimisc, network, pegas, phangorn, phytools, prodlim, RColorBrewer, reshape2, RBGL, scales, sna, tidyverse, treeio (>= 1.6.2), viridis
 License: GPL
 Encoding: UTF-8
 LazyData: true
diff --git a/phyloscannerR/NAMESPACE b/phyloscannerR/NAMESPACE
index 8e1da200..ce40e8e3 100644
--- a/phyloscannerR/NAMESPACE
+++ b/phyloscannerR/NAMESPACE
@@ -54,11 +54,14 @@ export(downsample.host)
 export(downsample.tree)
 export(draw.summary.statistics)
 export(drop.tip.get.map)
+export(extract.subgraph)
 export(extract.subtrees.for.hosts)
 export(extract.tt.subgraph)
 export(find.bam.and.references)
 export(find.duplicate.tips)
 export(find.gaps)
+export(find.networks)
+export(find.pairs.in.networks)
 export(form.rectangles)
 export(gather.summary.statistics)
 export(get.ancestral.sequence)
@@ -110,6 +113,9 @@ export(phyloscanner.analyse.tree)
 export(phyloscanner.analyse.trees)
 export(phyloscanner.generate.blacklist)
 export(phyloscanner.reconstruct)
+export(phyloscanner.to.simmap)
+export(plot.chain)
+export(plot.network)
 export(prepare.tree)
 export(process.tree)
 export(produce.host.graphs)
@@ -127,6 +133,7 @@ export(reconstruct.host.ancestral.sequences)
 export(rename.blacklisted.tips)
 export(resolve.tree.into.host.clades)
 export(select.windows.by.read.and.tip.count)
+export(simmap.to.phyloscanner)
 export(simplified.transmission.summary)
 export(simplify.summary)
 export(simplify.summary.multinomial)
@@ -141,9 +148,12 @@ export(transmission.summary)
 export(write.ann.nexus)
 export(write.ann.tree)
 export(write.annotated.tree)
+import(GGally)
 import(RColorBrewer)
 import(dplyr)
 import(ggplot2)
+import(ggtree)
+import(glue)
 import(grid)
 import(gtable)
 import(purrr)
@@ -152,6 +162,7 @@ import(tidyr)
 import(tidyverse)
 import(viridis)
 importFrom(GGally,ggnet2)
+importFrom(RBGL,edmondsOptimumBranching)
 importFrom(ape,.compressTipLabel)
 importFrom(ape,.uncompressTipLabel)
 importFrom(ape,as.DNAbin)
@@ -175,7 +186,11 @@ importFrom(ggtree,geom_point2)
 importFrom(ggtree,geom_tiplab)
 importFrom(ggtree,geom_treescale)
 importFrom(ggtree,ggtree)
+importFrom(igraph,clusters)
+importFrom(igraph,graph.data.frame)
+importFrom(igraph,igraph.to.graphNEL)
 importFrom(network,as.network.matrix)
+importFrom(network,network)
 importFrom(phangorn,Ancestors)
 importFrom(phangorn,Children)
 importFrom(phangorn,Descendants)
@@ -188,6 +203,7 @@ importFrom(phangorn,phyDat2alignment)
 importFrom(phangorn,pml)
 importFrom(phytools,nodeheight)
 importFrom(readr,read_csv)
+importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
 importFrom(scales,pretty_breaks)
 importFrom(tibble,as.tibble)
diff --git a/phyloscannerR/R/cmd.R b/phyloscannerR/R/command_line_utilities.R
similarity index 100%
rename from phyloscannerR/R/cmd.R
rename to phyloscannerR/R/command_line_utilities.R
diff --git a/phyloscannerR/R/main_analysis.R b/phyloscannerR/R/main_analysis.R
index 29bc9506..2a84b029 100644
--- a/phyloscannerR/R/main_analysis.R
+++ b/phyloscannerR/R/main_analysis.R
@@ -628,6 +628,7 @@ blacklist <- function(ptrees,
 #' @importFrom tidyr unnest
 #' @importFrom phangorn Ancestors Descendants Children mrca.phylo getRoot
 #' @export phyloscanner.analyse.trees 
+#' @seealso \code{\link{find.pairs.in.networks}}, \code{\link{find.networks}}
 #' @example inst/example/ex.phyloscanner.analyse.trees.R
 #' 
 phyloscanner.analyse.trees <- function(
@@ -1253,7 +1254,7 @@ multipage.summary.statistics <- function(ptrees, sum.stats, hosts = all.hosts.fr
 #' @import ggplot2
 #' @export write.annotated.tree
 
-write.annotated.tree <- function(ptree, file.name, format = c("pdf", "nex"), pdf.scale.bar.width = 0.01, pdf.w = 50, pdf.hm = 0.15, verbose = F){
+write.annotated.tree <- function(ptree, file.name, format = c("pdf", "nex", "ggplot"), tree.colours=NULL, pdf.scale.bar.width = 0.01, pdf.w = 50, pdf.hm = 0.15, verbose = F){
   tree <- ptree$tree
   read.counts <- ptree$read.counts
   
@@ -1266,12 +1267,26 @@ write.annotated.tree <- function(ptree, file.name, format = c("pdf", "nex"), pdf
   
   if(verbose) cat(paste0("Writing .",format," tree to file ",file.name,"\n"))
   
-  if(format == "pdf"){
+  if(!format %in% c("pdf","nex","ggplot")){
+	  stop("Unknown tree output format")
+  }
+  
+  if(format == "nex"){
+	  write.ann.nexus(tree, file=file.name, annotations = c("INDIVIDUAL", "SPLIT", "READ_COUNT"))
+  }
+  
+  if(is.null(tree.colours)){
+	hue.pal <- scales:::hue_pal(h= c(0, 360) + 15, c = 100, l = 65, h.start = 0, direction = 1)	  
+	tree.colours <- hue.pal(length(levels(attr(tree,'INDIVIDUAL'))))
+	names(tree.colours) <- levels(attr(tree,'INDIVIDUAL'))	  
+  }
+  
+  if(format %in% c("pdf","ggplot")){
     
-    tree.display <- ggtree(tree, aes(color=BRANCH_COLOURS)) +
+	tree.display <- ggtree(tree, aes(color=BRANCH_COLOURS)) +
       geom_point2(aes(subset=SUBGRAPH_MRCA, color=INDIVIDUAL), shape = 23, size = 3, fill="white") +
       geom_point2(aes(color=INDIVIDUAL, shape=BLACKLISTED), size=1) +
-      scale_color_hue(na.value = "black", drop=F) +
+      scale_color_manual(values=tree.colours, na.value = "black", drop=F) +
       scale_shape_manual(values=c(16, 4)) +
       theme(legend.position="none") +
       geom_tiplab(aes(col=INDIVIDUAL)) +
@@ -1296,12 +1311,14 @@ write.annotated.tree <- function(ptree, file.name, format = c("pdf", "nex"), pdf
     
     tree.display <- tree.display + ggplot2::xlim(0, 1.1*x.max)
     tree.display
-    ggsave(file.name, device="pdf", height = pdf.hm*length(tree$tip.label), width = pdf.w, limitsize = F)
-    
-  } else if(format == "nex"){
-    write.ann.nexus(tree, file=file.name, annotations = c("INDIVIDUAL", "SPLIT", "READ_COUNT"))
-  } else {
-    stop("Unknown tree output format")
+  }
+  
+  if(format == "pdf"){
+    ggsave(file.name, device="pdf", height = pdf.hm*length(tree$tip.label), width = pdf.w, limitsize = F)    
+  } 
+  
+  if(format == "ggplot"){
+	return(tree.display)
   }
 }
 
diff --git a/phyloscannerR/R/parsimony_reconstruction_methods.R b/phyloscannerR/R/parsimony_reconstruction_methods.R
index fca5ce08..5e7c9fff 100644
--- a/phyloscannerR/R/parsimony_reconstruction_methods.R
+++ b/phyloscannerR/R/parsimony_reconstruction_methods.R
@@ -153,7 +153,7 @@ split.hosts.to.subgraphs <- function(tree,
   rs.subgraphs <- rs.subgraphs %>%
     mutate(host = map_chr(subgraph, function(x) unlist(strsplit(x, "-SPLIT"))[1]),
            tip = map(subgraph, function(x) tree$tip.label[results$split.tips[[x]]])) %>%
-    unnest() %>%
+    unnest(c(host, subgraph, tip)) %>%
     select(host, subgraph, tip)
   
   list(tree=tree, rs.subgraphs=rs.subgraphs)
diff --git a/phyloscannerR/R/plotting_functions.R b/phyloscannerR/R/plotting_functions.R
index ba60e030..30ed2999 100644
--- a/phyloscannerR/R/plotting_functions.R
+++ b/phyloscannerR/R/plotting_functions.R
@@ -435,18 +435,31 @@ produce.host.graphs <- function(sum.stats, host, xcoords, x.limits, missing.wind
 #' @param control List of plotting attributes. 
 #' @return A list whose elements are \code{data}, the underlying data frame for the graph, and \code{graph}, the graph itself.
 #' @export produce.pairwise.graphs2
-#' @seealso \link{\code{classify.pairwise.relationships}}
+#' @seealso \code{\link{classify.pairwise.relationships}}
 #' @examples
 #' #
 #' # Example on data from Rakai Community Cohort Study
 #' # remember that you can specify dwin to save computing time, if you have it already computed
 #' #
 #' \dontrun{	
+#' #
+#' #	Example 1
+#' #
 #' file	<- system.file(file.path('extdata','ptyr192_phsc_analyse_trees_output.RData'),package='phyloscannerR')
 #' load(file)	#loads 'phsc', output from 'phyloscanner.analyse.trees'	
 #' hosts	<- c('RkA05868F','RkA05968M','RkA00369F','RkA01344M')
 #' inclusion <- "both" 
-#' tmp	<- produce.pairwise.graphs2(phsc, hosts=hosts, inclusion = "both")
+#' tmp	<- phyloscannerR:::produce.pairwise.graphs2(phsc, hosts=hosts, inclusion = "both")
+#' tmp$graph
+#' 
+#' #
+#' #	Example 2
+#' #
+#' infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+#' load(infile) 
+#' # loaded "dpl" (pairs of individuals between whom linkage not excluded), "dc" (phyloscanner classification counts), "dw" (phyloscanner classifications per window)
+#' hosts	<- c('RkA05868F','RkA05968M','RkA00369F','RkA01344M') 
+#' tmp	<- phyloscannerR:::produce.pairwise.graphs2(hosts=hosts, dwin=dw, inclusion = "both")
 #' tmp$graph
 #' }
 produce.pairwise.graphs2 <- function(	ptrees, 		
@@ -471,6 +484,13 @@ produce.pairwise.graphs2 <- function(	ptrees,
 					relationship.types=c(), 
 					verbose=TRUE)		
 	}
+	if(!is.null(dwin))
+	{
+		setnames(dwin, colnames(dwin), gsub('^h([1-2])','host\\.\\1',gsub('_','\\.',tolower(colnames(dwin)))) )
+		if(!all(c('host.1','host.2','tree.id','basic.classification','patristic.distance')%in%colnames(dwin)))
+			stop("Expected columns 'host.1','host.2','tree.id','basic.classification','patristic.distance'")		
+	}
+	dwin <- dwin %>% select(host.1, host.2, tree.id, basic.classification, patristic.distance)
 	stopifnot( dwin%>%filter(host.2<host.1)%>%nrow() == 0)
 	#	define basic topology
 	dwin <- dwin %>%
@@ -693,4 +713,273 @@ produce.pairwise.graphs <- function(ptrees,
     ylab("Host")
   
   return(list(graph = pairwise.plot, data = pair.data))
+}
+
+
+#' @export
+#' @author Oliver Ratmann
+#' @import tidyverse grid ggtree GGally
+#' @importFrom network network
+#' @title Plot transmission chain
+#' @description This function plots a transmission chain, showing one edge with two labels: 
+#' L indicates the proportion of deep-sequence phylogenies in whom the two individuals are phylogenetically close and adjacent.
+#' D indicates the proportion of deep-sequence phylogenies that support the indicated direction of transmission, out of all 
+#' deep-sequence phylogenies that support either direction of transmission.
+#' @param df tibble with the following columns 'H1', 'H2', 'CATEGORISATION', 'TYPE', 'SCORE' 
+#' @param di tibble with meta-data to customize the plot with columns 'H', node.shape, node.label, node.fill
+#' @param control list of plotting control variables; see examples. 
+#' @return ggplot object
+#' @seealso \code{\link{find.networks}}, \code{\link{plot.network}}
+#' @example inst/example/ex.transmission.networks.plotting.R
+plot.chain<- function(df, di, control)
+{
+	point.size <- control$point.size
+	edge.gap <- control$edge.gap
+	edge.size <- control$edge.size
+	curvature <- control$curvature
+	arrow <- control$arrow
+	curv.shift <- control$curv.shift
+	label.size <- control$label.size 
+	node.label <- control$node.label 
+	node.fill <- control$node.fill
+	node.shape <- control$node.shape
+	node.shape.values <- control$node.shape.values
+	node.fill.values <- control$node.fill.values
+	threshold.linked <- control$threshold.linked
+	layout <- control$layout
+	if(is.na(node.label))
+	{
+		node.label<- paste0('DUMMY',1+length(which(grepl('DUMMY',colnames(di)))))
+		di <- di%>%mutate(!!node.label:=NA_character_)		
+	}
+	if(is.na(node.shape))
+	{
+		node.shape<- paste0('DUMMY',1+length(which(grepl('DUMMY',colnames(di)))))
+		di <- di%>%mutate(!!node.shape:='NA')		
+	}
+	if(is.na(node.fill))
+	{
+		node.fill<- paste0('DUMMY',1+length(which(grepl('DUMMY',colnames(di)))))
+		di <- di%>%mutate(!!node.fill:='NA')
+	}
+	if(any(is.na(node.fill.values)))
+	{
+		z						<- unique(di[[node.fill]])
+		node.fill.values		<- heat.colors(length(z))
+		names(node.fill.values)	<- z
+	}
+	if(any(is.na(node.shape.values)))
+	{
+		z						<- unique(di[[node.shape]])
+		node.shape.values		<- seq_along(z)
+		names(node.shape.values)<- z
+	}
+	di <- di %>% rename( NODE_LABEL:= !!node.label,
+			NODE_SHAPE:= !!node.shape,
+			NODE_FILL:= !!node.fill)	
+	tmp	<- c('NODE_LABEL','NODE_SHAPE','NODE_FILL')[which(c(node.label, node.shape, node.fill)=='H')]
+	if(length(tmp))
+		di <- di %>% mutate(H:= !!rlang::sym(tmp))		
+	di <- di %>% select(H, NODE_LABEL, NODE_SHAPE, NODE_FILL)
+	#		
+	if(is.null(layout))
+	{
+		layout	<- as_tibble(ggnet2(network(df %>% select(H1,H2) %>% distinct(), directed=FALSE, matrix.type="edgelist"))$data[,c("label", "x", "y")])		
+	}		
+	if(any(grepl('label', colnames(layout))))
+	{		
+		setnames(layout, c('label','x','y'), c('H1','H1_X','H1_Y'))
+	}
+	if(any(colnames(layout)=='H'))
+	{
+		setnames(layout, c('H','X','Y'), c('H1','H1_X','H1_Y'))		
+	}	
+	df		<- df %>% inner_join(layout, by='H1')
+	setnames(layout, c('H1','H1_X','H1_Y'), c('H2','H2_X','H2_Y'))
+	df		<- df %>% inner_join(layout, by='H2')
+	setnames(layout, c('H2','H2_X','H2_Y'),  c('H','X','Y'))	
+	layout	<- layout %>% inner_join(di, by='H')	
+	
+	df <- df %>% mutate(	EDGETEXT_X:= (H1_X+H2_X)/2,
+							EDGETEXT_Y:= (H1_Y+H2_Y)/2,
+							EDGE_LABEL:= paste0('L',round(100*SCORE_LINKED,d=1),'%',' // ','D',round(100*pmax(SCORE_DIR_12,SCORE_DIR_21),d=1),'%')
+							)
+	if(is.na(threshold.linked))
+	{
+		df <- df %>% mutate(EDGE_COL:='edge_col_2')		
+	}
+	if(!is.na(threshold.linked))
+	{
+		df <- df %>% 
+				mutate(	EDGE_COL:= case_when(	SCORE_LINKED>=threshold.linked ~ 'edge_col_2',
+												SCORE_LINKED<threshold.linked ~ 'edge_col_1')
+						)												
+	}	
+	#	for edges, move the start and end points on the line between X and Y
+	#	define unit gradient
+	df <- df %>% mutate( MX:= (H2_X - H1_X), MY:= (H2_Y - H1_Y) )
+	tmp		<- sqrt(df$MX*df$MX+df$MY*df$MY)
+	df <- df %>% 
+			mutate( MX:= MX/tmp, MY:= MY/tmp) %>%
+			mutate( H1_X:= H1_X + MX*edge.gap,
+					H1_Y:= H1_Y + MY*edge.gap,
+					H2_X:= H2_X - MX*edge.gap,
+					H2_Y:= H2_Y - MY*edge.gap
+			)
+	#
+	p		<- ggplot() +			
+			geom_point(data=layout, aes(x=X, y=Y, colour=NODE_FILL, pch=NODE_SHAPE), size=point.size) +
+			geom_segment(data= df%>%filter(LINK_12==1), aes(x=H1_X, xend=H2_X, y=H1_Y, yend=H2_Y, size=edge.size*pmax(SCORE_DIR_21,SCORE_DIR_12), colour=EDGE_COL), arrow=arrow, lineend="butt") +			
+			geom_segment(data= df%>%filter(LINK_21==1), aes(x=H2_X, xend=H1_X, y=H2_Y, yend=H1_Y, size=edge.size*pmax(SCORE_DIR_21,SCORE_DIR_12), colour=EDGE_COL), arrow=arrow, lineend="butt") +						
+			scale_colour_manual(values=c(node.fill.values, 'edge_col_1'='grey80', 'edge_col_2'='grey40','NA'='grey50')) +
+			scale_shape_manual(values=c(node.shape.values, 'NA'=16)) +
+			scale_fill_manual(values=c(node.fill.values, 'NA'='grey50')) +
+			scale_size_identity() +
+			geom_text(data=df, aes(x=EDGETEXT_X, y=EDGETEXT_Y, label=EDGE_LABEL), size=label.size) +
+			geom_text(data=layout, aes(x=X, y=Y, label=NODE_LABEL)) +
+			theme_void() +
+			guides(colour='none', fill='none',size='none', pch='none')
+	layout		<- subset(layout, select=c(H,X,Y))
+	setnames(layout, c('H','X','Y'), c('label','x','y'))
+	p$layout	<- layout
+	p	
+}
+
+#' @export
+#' @author Oliver Ratmann
+#' @import tidyverse grid ggtree GGally 
+#' @importFrom network network
+#' @title Plot transmission network
+#' @description This function plots a phylogenetic transmission network, showing three types of edges: 
+#' two directed edges respectively in the 1->2 and 2->1 direction, and an undirected edge that represents phylogenetic support of close and adjacent individuals without evidence into the direction transmission.  
+#' @param df tibble with the following columns 'H1', 'H2', 'CATEGORISATION', 'TYPE', 'SCORE' 
+#' @param di tibble with meta-data to customize the plot with columns 'H', node.shape, node.label, node.fill
+#' @param control list of plotting control variables; see examples. 
+#' @return ggplot object
+#' @seealso \code{\link{find.networks}}, \code{\link{plot.chain}}
+#' @example inst/example/ex.transmission.networks.plotting.R
+plot.network<- function(df, di, control)
+{		
+	point.size <- control$point.size
+	edge.gap <- control$edge.gap
+	edge.size <- control$edge.size
+	curvature <- control$curvature
+	arrow <- control$arrow
+	curv.shift <- control$curv.shift
+	label.size <- control$label.size 
+	node.label <- control$node.label 
+	node.fill <- control$node.fill
+	node.shape <- control$node.shape
+	node.shape.values <- control$node.shape.values
+	node.fill.values <- control$node.fill.values
+	threshold.linked <- control$threshold.linked
+	if(is.na(node.label))
+	{
+		node.label<- paste0('DUMMY',1+length(which(grepl('DUMMY',colnames(di)))))
+		di <- di%>%mutate(!!node.label:=NA_character_)		
+	}
+	if(is.na(node.shape))
+	{
+		node.shape<- paste0('DUMMY',1+length(which(grepl('DUMMY',colnames(di)))))
+		di <- di%>%mutate(!!node.shape:='NA')		
+	}
+	if(is.na(node.fill))
+	{
+		node.fill<- paste0('DUMMY',1+length(which(grepl('DUMMY',colnames(di)))))
+		di <- di%>%mutate(!!node.fill:='NA')
+	}
+	if(any(is.na(node.fill.values)))
+	{
+		z						<- unique(di[[node.fill]])
+		node.fill.values		<- heat.colors(length(z))
+		names(node.fill.values)	<- z
+	}
+	if(any(is.na(node.shape.values)))
+	{
+		z						<- unique(di[[node.shape]])
+		node.shape.values		<- seq_along(z)
+		names(node.shape.values)<- z
+	}
+	di <- di %>% rename( NODE_LABEL:= !!node.label,
+			NODE_SHAPE:= !!node.shape,
+			NODE_FILL:= !!node.fill)	
+	tmp	<- c('NODE_LABEL','NODE_SHAPE','NODE_FILL')[which(c(node.label, node.shape, node.fill)=='H')]
+	if(length(tmp))
+		di <- di %>% mutate(H:= !!rlang::sym(tmp))		
+	di <- di %>% select(H, NODE_LABEL, NODE_SHAPE, NODE_FILL)
+	
+	layout	<- as_tibble(ggnet2(network(df %>% select(H1,H2) %>% distinct(), directed=FALSE, matrix.type="edgelist"))$data[,c("label", "x", "y")])
+	setnames(layout, c('label','x','y'), c('H1','H1_X','H1_Y'))
+	df		<- df %>% inner_join(layout, by='H1')
+	setnames(layout, c('H1','H1_X','H1_Y'), c('H2','H2_X','H2_Y'))
+	df		<- df %>% inner_join(layout, by='H2')
+	setnames(layout, c('H2','H2_X','H2_Y'),  c('H','X','Y'))	
+	layout	<- layout %>% inner_join(di, by='H')	
+	
+	df <- df %>% mutate(	EDGETEXT_X:= (H1_X+H2_X)/2,
+							EDGETEXT_Y:= (H1_Y+H2_Y)/2)
+	#
+	#	calculate score for linked
+	if(is.na(threshold.linked))
+	{
+		df <- df %>% mutate(EDGE_COL:='edge_col_2')		
+	}
+	if(!is.na(threshold.linked))
+	{
+		tmp <- df %>% 
+				filter(TYPE!='not.close.or.nonadjacent') %>%
+				group_by(H1,H2) %>%
+				summarise(SCORE:= sum(SCORE)) %>%
+				mutate(	EDGE_COL:= case_when(	SCORE>=threshold.linked ~ 'edge_col_2',
+												SCORE<threshold.linked ~ 'edge_col_1')
+						) %>%
+				ungroup() %>%
+				select(-SCORE)
+		df <- df %>% inner_join(tmp, by=c('H1','H2'))								
+	}	
+	#	for edges, move the start and end points on the line between X and Y
+	#	define unit gradient
+	df <- df %>% mutate( MX:= (H2_X - H1_X), MY:= (H2_Y - H1_Y) )
+	tmp		<- sqrt(df$MX*df$MX+df$MY*df$MY)
+	df <- df %>% 
+			mutate( MX:= MX/tmp, MY:= MY/tmp) %>%
+			mutate( H1_X:= H1_X + MX*edge.gap,
+					H1_Y:= H1_Y + MY*edge.gap,
+					H2_X:= H2_X - MX*edge.gap,
+					H2_Y:= H2_Y - MY*edge.gap
+					)
+	#	label could just be move on the tangent vector to the line
+	#	define unit tangent
+	df <- df %>% 
+			mutate(TX:= -MY, TY:=MX) %>%
+			mutate(	EDGETEXT_X:= case_when(	TYPE=='12' ~ EDGETEXT_X + TX*curv.shift,
+											TYPE=='21' ~ EDGETEXT_X - TX*curv.shift,
+											TYPE=='complex.or.no.ancestry' ~ EDGETEXT_X
+											),
+					EDGETEXT_Y:= case_when(	TYPE=='12' ~ EDGETEXT_Y + TY*curv.shift,
+											TYPE=='21' ~ EDGETEXT_Y - TY*curv.shift,
+											TYPE=='complex.or.no.ancestry' ~ EDGETEXT_Y
+											)
+					)
+	#	
+	p <- ggplot() +			
+			geom_point(data=layout, aes(x=X, y=Y, colour=NODE_FILL, pch=NODE_SHAPE), size=point.size) +
+			geom_segment(data= df%>%filter(EDGE_COL=='edge_col_1' & TYPE=='complex.or.no.ancestry' & SCORE>0), aes(x=H1_X, xend=H2_X, y=H1_Y, yend=H2_Y, size=edge.size*SCORE, colour=EDGE_COL), lineend="butt") +
+			geom_curve(data= df%>%filter(EDGE_COL=='edge_col_1' & TYPE=='12' & SCORE>0), aes(x=H1_X, xend=H2_X, y=H1_Y, yend=H2_Y, size=edge.size*SCORE, colour=EDGE_COL), curvature=curvature, arrow=arrow, lineend="butt") +
+			geom_curve(data= df%>%filter(EDGE_COL=='edge_col_1' & TYPE=='21' & SCORE>0), aes(x=H2_X, xend=H1_X, y=H2_Y, yend=H1_Y, size=edge.size*SCORE, colour=EDGE_COL), curvature=curvature, arrow=arrow, lineend="butt") +
+			geom_segment(data= df%>%filter(EDGE_COL=='edge_col_2' & TYPE=='complex.or.no.ancestry' & SCORE>0), aes(x=H1_X, xend=H2_X, y=H1_Y, yend=H2_Y, size=edge.size*SCORE, colour=EDGE_COL), lineend="butt") +
+			geom_curve(data= df%>%filter(EDGE_COL=='edge_col_2' & TYPE=='12' & SCORE>0), aes(x=H1_X, xend=H2_X, y=H1_Y, yend=H2_Y, size=edge.size*SCORE, colour=EDGE_COL), curvature=curvature, arrow=arrow, lineend="butt") +
+			geom_curve(data= df%>%filter(EDGE_COL=='edge_col_2' & TYPE=='21' & SCORE>0), aes(x=H2_X, xend=H1_X, y=H2_Y, yend=H1_Y, size=edge.size*SCORE, colour=EDGE_COL), curvature=curvature, arrow=arrow, lineend="butt") +									
+			scale_colour_manual(values=c(node.fill.values, 'edge_col_1'='grey80', 'edge_col_2'='grey40','NA'='grey50')) +
+			scale_shape_manual(values=c(node.shape.values, 'NA'=21)) +
+			scale_fill_manual(values=c(node.fill.values, 'NA'='grey50')) +
+			scale_size_identity() +
+			geom_text(data= df%>%filter(TYPE!='not.close.or.nonadjacent' & SCORE>0), aes(x=EDGETEXT_X, y=EDGETEXT_Y, label=paste0(round(100*SCORE,d=1),'%')), size=label.size) +
+			geom_text(data=layout, aes(x=X, y=Y, label=NODE_LABEL)) +
+			theme_void() +
+			guides(colour='none', fill='none',size='none', pch='none') 
+	layout <- layout %>% select(H,X,Y)
+	setnames(layout, c('H','X','Y'), c('label','x','y'))	
+	p$layout <- layout
+	p
 }
\ No newline at end of file
diff --git a/phyloscannerR/R/relationship_summary_methods.R b/phyloscannerR/R/relationship_summary_methods.R
index 1e70fc10..a92bd877 100644
--- a/phyloscannerR/R/relationship_summary_methods.R
+++ b/phyloscannerR/R/relationship_summary_methods.R
@@ -13,7 +13,7 @@
 #' #	continue Rakai example,
 #' #	load phyloscanner output from 'phyloscanner.analyse.trees'
 #' #	
-#' file	<- system.file(file.path('extdata','ptyr192_phsc_analyse_trees_output.R'),package='phyloscannerR')
+#' file	<- system.file(file.path('extdata','ptyr192_phsc_analyse_trees_output.Rdata'),package='phyloscannerR')
 #' load(file)	#loads 'phsc', output from 'phyloscanner.analyse.trees'
 #' #	use distance thresholds found in analysis of Rakai couples
 #' close.threshold <- 0.025
diff --git a/phyloscannerR/R/transmission_network_functions.R b/phyloscannerR/R/transmission_network_functions.R
new file mode 100644
index 00000000..7462cfbe
--- /dev/null
+++ b/phyloscannerR/R/transmission_network_functions.R
@@ -0,0 +1,351 @@
+#' @export
+#' @author Oliver Ratmann
+#' @import tidyverse 
+#' @title Find pairs of individuals between whom linkage is not excluded phylogenetically
+#' @param dwin A \code{data.frame} describing pairwise relationships between the hosts in each tree; normally output of \code{classify.pairwise.relationships}
+#' @param dc A \code{data.frame} summarising pairwise relationships between the hosts across all trees; normally output of \code{count.pairwise.relationships}
+#' @param control List of control variables:
+#' \itemize{
+#' 		\item{\code{linked.group}} Phyloscanner classification used to identify pairs in networks. Default 'close.and.adjacent.cat'.
+#' 		\item{\code{linked.no}} Phyloscanner classification type quantifying that pairs are not linked. Default 'not.close.or.nonadjacent'.
+#' 		\item{\code{linked.yes}} Phyloscanner classification type quantifying that pairs are linked. Default 'close.and.adjacent'.
+#' 		\item{\code{conf.cut}} Threshold on the proportion of deep-sequence phylogenies with distant/disconnected subgraphs above which pairs are considered phylogenetically unlinked. Default: 0.6
+#' 		\item{\code{neff.cut}} Threshold on the minimum number of deep-sequence phylogenies with sufficient reads from two individuals to make any phylogenetic inferences. Default: 3.
+#' }
+#' @param verbose Flag to switch on/off verbose mode. Default: TRUE. 
+#' @param dmeta Optional individual-level meta-data that is to be added to output. Can be NULL.
+#' @return 
+#' Three R objects are generated: 
+#' \itemize{
+#' 		\item{\code{network.pairs}} is a tibble that describes pairs of individuals between whom linkage is not excluded phylogenetically.
+#' 		\item{\code{relationship.counts}} is a tibble that summarises the phylogenetic relationship counts for each pair.
+#' 		\item{\code{windows}} is a tibble that describes the basic phyloscanner statistics (distance, adjacency, paths between subgraphs) in each deep-sequence phylogeny for each pair.
+#' }
+#' @description This function identifies pairs of individuals between whom linkage is not excluded phylogenetically in a large number of phyloscanner analyses, and provides detailed information on them.
+#' @seealso \code{\link{phyloscanner.analyse.trees}}, \code{\link{cmd.phyloscanner.analyse.trees}}
+#' @example inst/example/ex.transmission.networks.R
+find.pairs.in.networks <- function(dwin, dc, control= list(linked.group='close.and.adjacent.cat',linked.no='not.close.or.nonadjacent',linked.yes='close.and.adjacent', conf.cut=0.6, neff.cut=3), dmeta=NULL, verbose=TRUE)
+{	
+	#	check if dmeta in right format
+	if(!is.null(dmeta))
+	{		
+		if( !'tbl_df'%in%class(dmeta) )
+			stop('"dmeta" must have class tbl_df (be a tibble)')
+		if( !'ID'%in%colnames(dmeta) )
+			stop('"dmeta" must have a column "ID"')
+		if( class(dmeta$ID)!='character' )
+			stop('"dmeta$ID" must be a character')		
+	}	
+	#	internal variables
+	linked.group <- control$linked.group 
+	linked.no <- control$linked.no 
+	linked.yes <- control$linked.yes 
+	conf.cut <- control$conf.cut
+	neff.cut <- control$neff.cut	
+	
+	#	ensure IDs are characters
+	if(!all(c( 	class( dc$host.1 )=='character',
+					class( dc$host.2 )=='character',	
+					class( dwin$host.1 )=='character',
+					class( dwin$host.2 )=='character'
+			)))
+		stop('host.1 or host.2 not of character. This is unexpected, contact maintainer.')
+	#	ensure host.1 < host.2		
+	if( dc %>% filter( host.1 < host.2) %>% nrow() != dc %>% nrow() )
+		stop('Not all host.1 < host.2 in "dc". This is unexpected, contact maintainer.')
+	if( dwin %>% filter( host.1 < host.2) %>% nrow() != dwin %>% nrow() )
+		stop('Not all host.1 < host.2 in "dwin". This is unexpected, contact maintainer.')
+	
+	#	get list of pairs that are not ph-unlinked
+	dpl	<- dc %>% 
+			filter(categorisation==linked.group & type==linked.no) %>% 
+			filter(n.eff>=neff.cut & k.eff/n.eff < conf.cut) %>%
+			select(host.1, host.2) %>%
+			mutate(PTY_RUN:= infiles$PTY_RUN[i])
+	dpl	<- dc %>% 
+			filter(categorisation==linked.group & type==linked.yes) %>%
+			right_join(dpl, by=c('host.1','host.2'))
+	#	the pairs may just be ambiguous, keep only those with some evidence for linkage	
+	dpl <- dpl %>% filter(k.eff/n.eff > 0)			
+	#	gather all classifications counts for these pairs
+	dwin	<- dpl %>%
+			select(PTY_RUN, host.1, host.2) %>%
+			left_join(dwin, by=c('host.1','host.2'))
+	#	gather all classification counts for these pairs
+	dc	<- dpl %>%
+			select(PTY_RUN, host.1, host.2) %>%
+			left_join(dc, by=c('host.1','host.2'))
+	
+	#
+	#	upper case col names
+	setnames(dpl, colnames(dpl), toupper(gsub('\\.','_',gsub('host\\.','h',colnames(dpl)))))
+	setnames(dc, colnames(dc), toupper(gsub('\\.','_',gsub('host\\.','h',colnames(dc)))))
+	setnames(dwin, colnames(dwin), toupper(gsub('\\.','_',gsub('host\\.','h',colnames(dwin)))))
+	
+	#
+	#	add meta-data if provided
+	if(!is.null(dmeta))
+	{		
+		if(verbose) cat('\nAdd meta-data...')
+		tmp			<- unique(dmeta, by='ID')
+		setnames(tmp, colnames(tmp), gsub('H1_ID','H1',paste0('H1_',colnames(tmp))))
+		dpl <- dpl %>% left_join(tmp, by='H1')
+		dc <- dc %>% left_join(tmp, by='H1')
+		dwin <- dwin %>% left_join(tmp, by='H1')		
+		setnames(tmp, colnames(tmp), gsub('H1','H2',colnames(tmp)))
+		dpl <- dpl %>% left_join(tmp, by='H2')
+		dc <- dc %>% left_join(tmp, by='H2')
+		dwin <- dwin %>% left_join(tmp, by='H2')		
+	}
+	
+	if(verbose) cat('\nDone. Found pairs, n=', nrow(dpl), '. Found relationship counts, n=', nrow(dc), '. Found phyloscanner statistics, n=', nrow(dwin), '.')
+	#	return
+	list(network.pairs=dpl, relationship.counts=dc, windows=dwin)
+}
+
+
+#' @export
+#' @author Oliver Ratmann
+#' @import tidyverse glue
+#' @importFrom igraph graph.data.frame clusters
+#' @title Find phylogenetic transmission networks and most likely transmission chain
+#' @param dc Summary of phylogenetic relationship counts for each pair, stored as tibble.
+#' @param control List of control variables:
+#' \itemize{
+#' 		\item{\code{linked.group}} Phyloscanner classification used to identify pairs in networks. Default 'close.and.adjacent.cat'.
+#' 		\item{\code{linked.no}} Phyloscanner classification type quantifying that pairs are not linked. Default 'not.close.or.nonadjacent'.
+#' 		\item{\code{linked.yes}} Phyloscanner classification type quantifying that pairs are linked. Default 'close.and.adjacent'.
+#' 		\item{\code{neff.cut}} Threshold on the minimum number of deep-sequence phylogenies with sufficient reads from two individuals to make any phylogenetic inferences. Default: 3.
+#' 		\item{\code{weight.complex.or.no.ancestry}} Weight given to score complex.or.no.ancestry. Default: 50%.  
+#' } 	
+#' @param verbose Flag to switch on/off verbose mode. Default: TRUE. 
+#' @return list of two R objects 
+#' \itemize{
+#' 		\item{\code{transmission.networks}} is a tibble that describes the edge list of pairs of individuals in a network, and corresponding phyloscanner scores
+#' 		\item{\code{most.likely.transmission.chains}} is a tibble that describes the edge list of pairs of individuals in the most likely chain, and corresponding phyloscanner scores
+#' }
+#' See description.
+#' @description 
+#' This function computes transmission networks from phyloscanner output of a population-based deep sequence sample. 
+#' A transmission network is defined as a set of individuals between whom phylogenetic linkage is not excluded.
+#' Every individual in the network has at least one partner in the network between whom evidence for being phylogenetically unlinked is below a threshold.
+#' These pairs of individuals are identified with a separate function, \code{\link{find.pairs.in.networks}}.  
+#' Due to the nature of the deep-sequence
+#' data, there are up to three edges between pairs of individuals, giving the strength of evidence
+#' of spread in each direction (two possibilities) and the strength of evidence for phylogenetic linkage with 
+#' the direction remaining unclear. Some of these pairs have limited evidence for phylogenetic linkage. The networks
+#' are best interpreted as partially sampled transmission chain. 
+#' 
+#' This function also finds the most likely transmission 
+#' chain among all chains spanning the nodes in a specified transmission network.
+#' The transmission network consists of at most three edges between a set of
+#' individuals (directed edge in either direction, and undirected edge). Chains 
+#' are defined as spanning graphs through the set of nodes, without loops and with
+#' in-degree equal to one for all nodes, except the start node. Each directed edge 
+#' in a chain has a weight, which corresponds to the phylogenetic evidence of transmission
+#' in this direction. It is set to the phyloscanner score for transmission in this 
+#' direction plus half the phyloscanner score of the undirected edge, for transmission 
+#' with direction unclear. The probability of the entire chain is given by the product
+#' of the phyloscanner scores along each edge in the chain.
+#'    
+#' @seealso \code{\link{find.pairs.in.networks}}, \code{\link{plot.network}}, \code{\link{plot.chain}}
+#' @example inst/example/ex.transmission.networks.R
+find.networks<- function(dc, control= list(linked.group='close.and.adjacent.cat',linked.no='not.close.or.nonadjacent',linked.yes='close.and.adjacent', neff.cut=3, weight.complex.or.no.ancestry=0.5), verbose=TRUE)
+{	
+	#	internal constants
+	linked.group 	<- control$linked.group 
+	linked.no 		<- control$linked.no 
+	linked.yes 		<- control$linked.yes 	
+	neff.cut 		<- control$neff.cut		
+	weight.complex.or.no.ancestry <- control$weight.complex.or.no.ancestry
+	scores.group	<- 'close.and.adjacent.and.ancestry.cat'
+	scores.nolink	<- 'not.close.or.nonadjacent'
+	scores.ambig	<- 'complex.or.no.ancestry' 
+	dir.group		<- "close.and.adjacent.and.directed.cat"
+	
+	#
+	#	construct tri-edge transmission network	
+	dnet <- dc %>% 
+			filter( CATEGORISATION==linked.group &  TYPE==linked.yes & N_EFF>neff.cut) %>%
+			select(-c(CATEGORICAL_DISTANCE, TYPE, K, K_EFF, N, N_EFF, SCORE))
+	#	define potential transmission network membership
+	if(verbose) cat(glue('\nReconstructing {nrow(dnet)} transmission networks among linked pairs'))
+	tmp <- dnet %>% select(H1, H2)			
+	tmp <- igraph:::graph.data.frame(tmp, directed=FALSE, vertices=NULL)
+	rtc <- tibble(ID=V(tmp)$name, CLU=clusters(tmp, mode="weak")$membership)
+	rtc <- rtc %>% 
+			group_by(CLU) %>% 
+			summarise( CLU_SIZE:=length(ID) ) %>% 
+			arrange(desc(CLU_SIZE)) %>%
+			ungroup() %>%
+			mutate( IDCLU:=seq_along(CLU_SIZE) ) %>%
+			inner_join(rtc, by='CLU') %>%
+			select(-CLU)	
+	#	add info on edges: network membership
+	rtc <- rtc %>% rename(H1:=ID)
+	dnet <- dnet %>% inner_join(rtc, by='H1')
+	rtc <- rtc %>% select(-CLU_SIZE)
+	rtc <- rtc %>% rename(H2:=H1)	
+	dnet <- dnet %>% inner_join(rtc, by=c('H2','IDCLU'))	
+	#	add info on edges: direction 12, direction 21, direction ambiguous, unlinked
+	#	add this info in new rows
+	tmp <- dc %>% 
+			filter(CATEGORISATION==scores.group) %>%
+			select(PTY_RUN, H1, H2, TYPE, SCORE)
+	dnet <- dnet %>% inner_join(tmp, by=c('H1','H2','PTY_RUN'))		
+	if(verbose) cat(glue('\nFound {length(unique(dnet$IDCLU))} transmission networks with 
+	                     {dnet %>% filter(TYPE!=scores.nolink) %>% distinct(H1,H2) %>% nrow()} 
+	                     links (in either direction or undirected) and {length(unique(c(dnet$H1, dnet$H2)))}
+	                     individuals'))
+	#	generate most likely transmission chains
+	if(verbose) cat('\nReconstructing most likely transmission chains...')	
+	tmp <- dnet %>% 
+			filter(TYPE!=scores.nolink) %>% 
+			select(PTY_RUN,H1,H2,IDCLU,CLU_SIZE,TYPE,SCORE)
+	dchain <- find.most.likely.chains.RBGLedmonds(tmp, weight.complex.or.no.ancestry=weight.complex.or.no.ancestry)
+	
+	#	merge pw linkage scores to the ML chain representation
+	#	rationale: this describes prob of linkage. here, any 'inconsistent direction' is still considered as prob for linkage
+	tmp <- dc %>% 
+			filter( CATEGORISATION==linked.group & TYPE==linked.yes ) %>%
+			select(H1,H2,PTY_RUN,SCORE) %>%
+			rename(SCORE_LINKED=SCORE)
+	dchain <- dchain %>% inner_join(tmp, by=c('H1','H2','PTY_RUN'))
+	#	merge pw direction scores to the ML chain representation
+	#	this is considering in denominator 12 + 21 before reducing probs to achieve self-consistency
+	#	rationale: decide on evidence for direction based on comparing only the flows in either direction, 12 vs 21
+	tmp <- dc %>% 
+			filter( CATEGORISATION==dir.group ) %>%
+			select(H1,H2,PTY_RUN,TYPE,SCORE) %>%
+			mutate(TYPE:= paste0('SCORE_DIR_',TYPE)) %>%
+			spread(TYPE,SCORE)
+	dchain <- dchain %>% left_join(tmp, by=c('H1','H2','PTY_RUN'))
+	#	merge pw network scores to the ML chain representation
+	#	this is considering in denominator 12 + 21 + unclear reducing probs to achieve self-consistency
+	#	same as MX_PROB_12, MX_PROB_21, after the final step below that sets one of the two probs to zero
+	tmp <- dc %>% 
+			filter( CATEGORISATION==scores.group & TYPE!=scores.nolink) %>%
+			select(H1,H2,PTY_RUN,TYPE,SCORE) %>%
+			mutate(TYPE:= replace(TYPE, TYPE==scores.ambig, 'AMB')) %>%
+			mutate(TYPE:= paste0('SCORE_NW_',TYPE)) %>%
+			spread(TYPE,SCORE)
+	dchain <- dchain %>% inner_join(tmp, by=c('H1','H2','PTY_RUN'))
+	#	fill in NA
+	dchain <- dchain %>% 			
+			mutate_at(c('SCORE_DIR_12','SCORE_DIR_21','SCORE_NW_12','SCORE_NW_21'), function(x) replace(x, is.na(x), 0)) 
+ 	#	ensure scores are compatible with self-consistency in ML chain
+	dchain <- dchain %>%
+			filter(LINK_12==1 | LINK_21==1) %>%
+			mutate(	SCORE_NW_12= replace(SCORE_NW_12, LINK_12==0 & LINK_21==1 & SCORE_NW_12>SCORE_NW_21, 0),
+					SCORE_NW_21= replace(SCORE_NW_21, LINK_12==1 & LINK_21==0 & SCORE_NW_21>SCORE_NW_12, 0),
+					SCORE_DIR_12= replace(SCORE_DIR_12, LINK_12==0 & LINK_21==1 & SCORE_DIR_12>SCORE_DIR_21, 0),
+					SCORE_DIR_21= replace(SCORE_DIR_21, LINK_12==1 & LINK_21==0 & SCORE_DIR_21>SCORE_DIR_12, 0),
+			)
+	#	copy meta data to ML chain
+	tmp <- dnet %>%  
+			select(-c(CLU_SIZE, IDCLU, TYPE, SCORE)) %>% 
+			distinct()
+	dchain <- dchain %>% inner_join(tmp, by=c('H1','H2','PTY_RUN'))
+	#	verbose
+	if(verbose) cat(glue("\nIdentified {dchain %>% select(IDCLU) %>% distinct() %>% nrow()} most likely transmission chains 
+	                      with {nrow(dchain)} links and {length(unique(c(dchain$H1, dchain$H2)))} individuals"))
+	if(verbose) cat('\nDone.')
+	# return
+	list(transmission.networks=dnet, most.likely.transmission.chains=dchain)
+}
+
+#' @keywords internal
+#' @author Oliver Ratmann
+#' @importFrom igraph graph.data.frame igraph.to.graphNEL
+#' @importFrom RBGL edmondsOptimumBranching
+#' @title Find most likely transmission chains
+#' @description This function finds the most likely transmission 
+#' chain among all chains spanning the nodes in a specified transmission network.
+#' The transmission network consists of at most three edges between a set of
+#' individuals (directed edge in either direction, and undirected edge). Chains 
+#' are defined as spanning graphs through the set of nodes, without loops and with
+#' in-degree equal to one for all nodes, except the start node. Each directed edge 
+#' in a chain has a weight, which corresponds to the phylogenetic evidence of transmission
+#' in this direction. It is set to the phyloscanner score for transmission in this 
+#' direction plus half the phyloscanner score of the undirected edge, for transmission 
+#' with direction unclear. The probability of the entire chain is given by the product
+#' of the phyloscanner scores along each edge in the chain.   
+#' Edmonds algorithm is used to solve for the most probable chain.
+#' @param rtnn tibble encoding the three edges of a phyloscanner transmission network. Must have columns 'H1','H2','IDCLU','TYPE','SCORE','K_EFF'.
+#' @param weight.complex.or.no.ancestry weight given to score complex.or.no.ancestry, default: 50%   
+#' @return tibble encoding the most likely chain. Has columns 'H1','H2','IDCLU', 'LINK_12', 'LINK_21' (either 1 or 0 for a link in the corresponding direction), and 'MX_PROB_12', 'MX_PROB_21' (associated posterior probabilities)
+#' @seealso \code{\link{find.networks}} 
+find.most.likely.chains.RBGLedmonds<- function(rtnn, weight.complex.or.no.ancestry=0.5, verbose=0)
+{
+	stopifnot(c('PTY_RUN','H1','H2','IDCLU','CLU_SIZE','TYPE','SCORE')%in%colnames(rtnn))
+	if( length(setdiff(c('complex.or.no.ancestry','21','12'), rtnn %>% distinct(TYPE) %>% pull(TYPE)) ))
+		stop('Unexpected classification types. Contact maintainer.')	
+	#	define weights to convert tri-edges to bi-edges 
+	#	and create bi-edges
+	rtm <- rtnn %>% 
+			mutate(	ID1_IN_WEIGHT= case_when(	TYPE=='12' ~ 0,
+							TYPE=='complex.or.no.ancestry' ~ weight.complex.or.no.ancestry,
+							TYPE=='21' ~ 1),
+					ID2_IN_WEIGHT= case_when(	TYPE=='21' ~ 0,
+							TYPE=='complex.or.no.ancestry' ~ weight.complex.or.no.ancestry,
+							TYPE=='12' ~ 1)															
+			) %>%
+			group_by( PTY_RUN,IDCLU,CLU_SIZE,H1,H2 ) %>%
+			summarise( 	PROB_21= sum(SCORE*ID1_IN_WEIGHT), 					
+						PROB_12= sum(SCORE*ID2_IN_WEIGHT)
+						) %>%
+			ungroup()
+	#	handle networks of size 2 - this is easy	
+	ans <- rtm %>% 
+			filter(CLU_SIZE==2) %>%
+			transform(TIES_RND_BRK= runif(length(CLU_SIZE))) %>%
+			mutate( LINK_12 = ifelse((PROB_12>PROB_21) | (PROB_12==PROB_21 & TIES_RND_BRK>=0.5), 1L, 0L),
+					LINK_21 = ifelse((PROB_21>PROB_12) | (PROB_12==PROB_21 & TIES_RND_BRK<0.5), 1L, 0L) ) %>%
+			select(-TIES_RND_BRK)
+	#	handle networks of size >2 - use Edmonds algorithm 	
+	rtm <- rtm %>% 
+			filter(CLU_SIZE>2) %>%
+			group_by(IDCLU) %>%
+			group_split()
+	for(i in seq_along(rtm))
+	{
+		#	
+		#i	<- 13 
+		if(verbose)
+			cat('\nIDCLU ',i)
+		#	convert to weighted edge list
+		tmp <- rtm[[i]] %>% 
+				mutate(weight:=PROB_12) %>%
+				select(H1,H2,weight) 
+		edgelist <- rtm[[i]] %>% 
+				mutate(weight:=PROB_21) %>% 
+				rename(H1:= H2, H2:= H1) %>%
+				select(H1,H2,weight) %>%
+				bind_rows(tmp) %>%
+				filter(weight>0) 
+		#	maximisation is over sum of weights, so transform to log prob
+		#	since edmonds cannot handle negative values or zeros, add some constant
+		edgelist <- edgelist %>% mutate(weight:= log(weight) - min(log(weight)) + 1 )
+		#	run Edmonds 
+		g <- graph.data.frame(edgelist)	
+		g2 <- igraph.to.graphNEL(g)
+		g3 <- edmondsOptimumBranching(g2)
+		edgelist <- as_tibble(t(g3$edgeList)) %>%
+				rename(H1:=from, H2:= to) %>%
+				mutate(	LINK_12:= 1L,
+						LINK_21:= 0L)
+		edgelist <- edgelist %>% 
+				rename(H1:= H2, H2:=H1, LINK_12:=LINK_21, LINK_21:=LINK_12) %>%
+				bind_rows(edgelist) 
+		rtm[[i]] <- rtm[[i]] %>% inner_join(edgelist, by=c('H1','H2'))						
+	}
+	rtm		<- do.call('rbind',rtm)
+	ans		<- rbind(ans, rtm)
+	ans		<- ans %>% 
+			select(IDCLU, CLU_SIZE, PTY_RUN, H1, H2, LINK_12, LINK_21)
+			#mutate(	PROB_21= replace(PROB_21, LINK_12==1, 0),
+			#		PROB_12= replace(PROB_12, LINK_21==1, 0)
+			#		) %>%
+			#rename(MX_PROB_21=PROB_21, MX_PROB_12=PROB_12, MX_KEFF_21=KEFF_21, MX_KEFF_12=KEFF_12 )
+	ans
+}
\ No newline at end of file
diff --git a/phyloscannerR/R/tree_utility_functions.R b/phyloscannerR/R/tree_utility_functions.R
index e15a03ed..badc621b 100644
--- a/phyloscannerR/R/tree_utility_functions.R
+++ b/phyloscannerR/R/tree_utility_functions.R
@@ -553,3 +553,187 @@ select.windows.by.read.and.tip.count <- function(ptrees, dwin, tip.regex, min.re
   
   dwin
 }
+
+#' @title Cast phyloscanner tree to SIMMAP tree
+#' @export phyloscanner.to.simmap
+#' @author Oliver Ratmann
+#' @importFrom reshape2 dcast
+#' @usage phyloscanner.to.simmap(ph, delete.phyloscanner.structures)
+#' @param ph A \code{phyloscanner.tree} with attributes \code{SPLIT}, \code{INDIVIDUAL}, \code{BRANCH_COLOURS}, \code{SUBGRAPH_MRCA}.
+#' @param delete.phyloscanner.structures Logical value. If true \code{phyloscanner} attributes are removed from the tree.
+#' @return Same tree in \code{SIMMAP} format, with elements \code{maps}, \code{mapped.edge}, \code{node.states}.
+#' @seealso \code{\link{simmap.to.phyloscanner}}, \code{\link{extract.subgraph}}
+#' @example inst/example/ex.cast.to.simmap.R
+phyloscanner.to.simmap<- function(ph, delete.phyloscanner.structures=FALSE)
+{
+	# make maps for each edge
+	# attr(ph, 'SPLIT') specifies state of branch ending in node 
+	edge.state <- as.character( attr(ph, 'SPLIT')[ ph$edge[,2] ] )
+	edge.state[is.na(edge.state)] <- 'Unknown'	
+	edge.maps <- ph$edge.length	
+	# make mapped.edge matrix
+	mapped.edge <- data.frame(STATE= edge.state, LEN=edge.maps, IDX=seq_along(edge.state))
+	mapped.edge <- reshape2:::dcast(mapped.edge, IDX~STATE, value.var='LEN')
+	for(x in colnames(mapped.edge))
+		mapped.edge[[x]][ which(is.na(mapped.edge[[x]])) ] <- 0
+	mapped.edge <- as.matrix(mapped.edge)	
+	rownames(mapped.edge) <- apply(ph$edge,1,paste, collapse=',')	
+	# finalise edge.maps
+	edge.maps <- lapply(seq_along(edge.maps), function(k){ x<- edge.maps[[k]]; names(x)<- edge.state[k]; x})
+	# make node.states	
+	node.states <- as.character( attr(ph, 'SPLIT')[ ph$edge[,1] ] )
+	node.states[is.na(node.states)] <- 'Unknown'
+	node.states <- matrix(node.states, nrow=nrow(ph$edge), ncol=2)	
+	node.states[,2] <- edge.state
+	# set list elements
+	ph[['maps']] <- edge.maps
+	ph[['mapped.edge']] <- mapped.edge
+	ph[['node.states']] <- node.states
+	# set attributes
+	attr(ph,'class') <- c("simmap", "phylo")
+	attr(ph, "map.order") <- "right-to-left"
+	attr(ph, "order") <- "cladewise"
+	# delete attributes
+	if(delete.phyloscanner.structures)
+	{
+		attr(ph, "SPLIT") <- NULL
+		attr(ph, "INDIVIDUAL") <- NULL
+		attr(ph, "BRANCH_COLOURS") <- NULL
+		attr(ph, "SUBGRAPH_MRCA") <- NULL	
+	}	
+	ph
+}
+
+#' @title Cast SIMMAP tree to phyloscanner tree
+#' @export simmap.to.phyloscanner
+#' @importFrom phangorn Ancestors
+#' @author Oliver Ratmann
+#' @usage simmap.to.phyloscanner(ph, delete.simmap.structures)
+#' @param ph A \code{SIMMAP} tree with elements \code{maps}, \code{mapped.edge}, \code{node.states}.
+#' @param delete.simmap.structures Logical value. If true \code{SIMMAP} elements are removed from the tree.
+#' @return Same tree in \code{phyloscanner.tree} format, with attributes \code{SPLIT}, \code{INDIVIDUAL}, \code{BRANCH_COLOURS}, \code{SUBGRAPH_MRCA}.
+#' @seealso \code{\link{phyloscanner.to.simmap}}, \code{\link{extract.subgraph}}
+#' @example inst/example/ex.cast.to.simmap.R
+simmap.to.phyloscanner <- function(ph, delete.simmap.structures=FALSE)
+{
+	stopifnot( any(class(ph)=='simmap') )
+	# state at each node. set root to Unknown by default
+	node.states <- vector('character', Nnode(ph, internal.only=FALSE))
+	node.states[ph$edge[,2]] <- ph$node.states[,2]
+	node.states[Ntip(ph)+1] <- 'Unknown'
+	# find subgraph mrcas
+	subgraphs <- setdiff(sort(unique(node.states)),'Unknown') 
+	# for each subgraph, find one member node
+	subgraph.member <- sapply(subgraphs, function(x) which(node.states==x)[1] )
+	# for each member, descend in tree and find most ancestral member node, which is the mrca of the subgraph
+	subgraph.ancestors <- phangorn:::Ancestors(ph, subgraph.member)
+	subgraph.mrcas <- sapply(seq_along(subgraph.ancestors), function(j){				
+				subgraph.mrca <- subgraph.member[j]
+				subgraph.name <- names(subgraph.member)[j]
+				nodes.in.same.subgraph.idx <- which(node.states[subgraph.ancestors[[j]]]==subgraph.name)
+				if(length(nodes.in.same.subgraph.idx))
+					subgraph.mrca <- subgraph.ancestors[[j]][ tail(nodes.in.same.subgraph.idx,1) ]
+				subgraph.mrca
+			})
+	names(subgraph.mrcas) <- subgraphs
+	#	make phyloscanner attributes
+	attr(ph, 'SPLIT') <- node.states
+	tmp <- rep(FALSE, length(node.states))
+	tmp[subgraph.mrcas] <- TRUE
+	attr(ph, 'SUBGRAPH_MRCA') <- tmp
+	attr(ph, 'INDIVIDUAL') <- gsub('-SPLIT[0-9]+','',node.states)
+	tmp <- t( rbind( node.states[ ph$edge[,1] ], 
+					node.states[ ph$edge[,2] ], 
+					node.states[ ph$edge[,2] ] ) )
+	tmp[tmp[,1]!=tmp[,2], 3] <- 'Unknown'	
+	tmp <- gsub('-SPLIT[0-9]+','',tmp[,3])
+	attr(ph, 'BRANCH_COLOURS') <- rep(NA, Nnode(ph, internal.only=FALSE)) 
+	attr(ph, 'BRANCH_COLOURS')[ ph$edge[,2] ] <- tmp		
+	attr(ph, 'SPLIT')[attr(ph, 'SPLIT')=='Unknown'] <- NA
+	attr(ph, 'INDIVIDUAL')[attr(ph, 'INDIVIDUAL')=='Unknown'] <- NA
+	attr(ph, 'BRANCH_COLOURS')[attr(ph, 'BRANCH_COLOURS')=='Unknown'] <- NA
+	attr(ph, 'SPLIT') <- factor(attr(ph, 'SPLIT'))
+	attr(ph, 'INDIVIDUAL') <- factor(attr(ph, 'INDIVIDUAL'))
+	attr(ph, 'BRANCH_COLOURS') <- factor(attr(ph, 'BRANCH_COLOURS'))
+	if(delete.simmap.structures)
+	{
+		ph[["maps"]] <- NULL
+		ph[["mapped.edge"]] <- NULL
+		ph[["node.states"]] <- NULL
+		attr(ph, 'map.order') <- NULL
+		attr(ph, 'class') <- 'phylo'		
+	}
+	ph
+}
+
+#' @title Extract subgraph from phyloscanner tree
+#' @export extract.subgraph
+#' @importFrom phangorn Ancestors
+#' @author Oliver Ratmann
+#' @usage extract.subgraph(ph, mrca)
+#' @param ph A tree of class \code{SIMMAP} and \code{phyloscanner}, i.e. with elements \code{maps}, \code{mapped.edge}, \code{node.states} and with attributes \code{SPLIT}, \code{INDIVIDUAL}, \code{BRANCH_COLOURS}, \code{SUBGRAPH_MRCA}.
+#' @param mrca Node index of subgraph MRCA
+#' @return Subgraph in ape format, with additional elements \code{subgraph.name} (subgraph name in the phyloscanner SPLIT attribute), \code{subgraph.root.edge} (length of the ancestral edge of the subgraph MRCA), \code{subgraph.parent.state} (ancestral state of the parent of subgraph MRCA).  
+#' @seealso \code{\link{phyloscanner.to.simmap}}
+#' @example inst/example/ex.extract.subgraph.R
+extract.subgraph<- function(ph, mrca)
+{
+	stopifnot( any(class(ph)=='simmap') )
+	stopifnot( c("SPLIT","INDIVIDUAL","BRANCH_COLOURS","SUBGRAPH_MRCA")%in%names(attributes(ph)) )
+	subgraph.name <- as.character(attr(ph, 'SPLIT')[mrca])
+	host <- as.character(attr(ph, "INDIVIDUAL")[mrca])
+	subgraph.root.edge <- ph$edge.length[ which( ph$edge[,2]==mrca ) ]
+	subgraph.parent.state <- as.character(attr(ph, "BRANCH_COLOURS")[ ph$edge[ph$edge[,2]==mrca,1] ])
+	#	it is possible that:
+	#	the original tree had a subgraph rooted in some state X, but dropping all tips of that subgraph
+	#	gives a tree in which the subgraph of mrca is again rooted in a subgraph of state host
+	if(!is.na(subgraph.parent.state) && subgraph.parent.state==host)
+	{
+		cat('\nFound subgraph.parent.state that is of same state. This may not be an error but check. mrca=',mrca)
+		subgraph.parent.state <- NA
+	}		
+	stopifnot( is.na(subgraph.parent.state) || subgraph.parent.state!=host )
+	if(mrca<=Ntip(ph))
+	{
+		subgraph<- list(	edge= matrix(nrow=0, ncol=2), 
+				edge.length=vector('numeric',0),
+				Nnode=0,
+				tip.label=ph$tip.label[mrca],
+				maps= vector('list',0),
+				mapped.edge= matrix(nrow=0, ncol=0),
+				node.states= matrix(nrow=0, ncol=2)
+		)
+		#attr(subgraph,'class') <- c( "phylo")
+		#attr(subgraph, "map.order") <- "right-to-left"
+		#attr(subgraph, "order") <- "cladewise"		
+	}
+	if(mrca>Ntip(ph))
+	{
+		subgraph <- phytools:::extract.clade.simmap(ph, mrca)
+		subgraph.root <- Ntip(subgraph)+1L
+		descendants <- Descendants(subgraph, subgraph.root, type='all')
+		tmp <- sapply(descendants, function(x) which(subgraph$edge[,2]==x))
+		descendants.states <- subgraph[['node.states']][tmp,2]
+		descendants.not.in.subgraph <- descendants[ descendants.states!=subgraph.name ]
+		if(length(descendants.not.in.subgraph))
+		{
+			# find all tips of descendants.not.in.subgraph, and remove the corresponding tips, which will remove the subtree for each descendant
+			tips.not.in.subgraph <- sort(unique(unlist(Descendants(subgraph, descendants.not.in.subgraph, type='tips'))))
+			subgraph <- ape:::drop.tip(subgraph, tips.not.in.subgraph)
+			subgraph[['maps']] <- NULL
+			subgraph[['mapped.edge']] <- NULL
+			subgraph[['node.states']] <- NULL
+			attr(subgraph,'class') <- c( "phylo")
+			attr(subgraph, "map.order") <- NULL
+			attr(subgraph, "order") <- NULL
+			attr(subgraph, "SPLIT") <- NULL
+			attr(subgraph, "INDIVIDUAL") <- NULL
+			attr(subgraph, "BRANCH_COLOURS") <- NULL
+			attr(subgraph, "SUBGRAPH_MRCA") <- NULL			
+		}		
+	}
+	subgraph[['subgraph.name']]<- subgraph.name
+	subgraph[['subgraph.root.edge']]<- subgraph.root.edge
+	subgraph[['subgraph.parent.state']]<- subgraph.parent.state
+	subgraph	
+}
\ No newline at end of file
diff --git a/phyloscannerR/inst/.DS_Store b/phyloscannerR/inst/.DS_Store
index 035a1fc5..3fb4dabd 100644
Binary files a/phyloscannerR/inst/.DS_Store and b/phyloscannerR/inst/.DS_Store differ
diff --git a/phyloscannerR/inst/.gitignore b/phyloscannerR/inst/.gitignore
new file mode 100644
index 00000000..e43b0f98
--- /dev/null
+++ b/phyloscannerR/inst/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/phyloscannerR/inst/example/ex.cast.to.simmap.R b/phyloscannerR/inst/example/ex.cast.to.simmap.R
new file mode 100644
index 00000000..7775d63e
--- /dev/null
+++ b/phyloscannerR/inst/example/ex.cast.to.simmap.R
@@ -0,0 +1,40 @@
+#
+# Example on data from Rakai Community Cohort Study
+#
+require(phyloscannerR)
+ph.phyloscanner <- readRDS(file=system.file(file.path('extdata','ptyr292_1600_to_1849.rds'),package='phyloscannerR'))
+
+#	cast from phyloscanner format to SIMMAP format
+#	so that the SIMMAP tree still has the phyloscanner attributes
+ph.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=FALSE)
+#	to delete the phyloscanner attributes, use
+ph.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=TRUE)
+#
+#	cast from SIMMAP format to phyloscanner format
+#	similarly, use delete.simmap.structures to keep/delete SIMMAP elements from the tree in phyloscanner format
+ph.phyloscanner2 <- simmap.to.phyloscanner(ph.simmap, delete.simmap.structures=TRUE)
+#
+#	test that ph.phyloscanner2 is the same as ph.phyloscanner:
+stopifnot(all( ph.phyloscanner$tip.label == ph.phyloscanner2$tip.label ))
+stopifnot(all( ph.phyloscanner$edge == ph.phyloscanner2$edge ))
+stopifnot(all( ph.phyloscanner$edge.length == ph.phyloscanner2$edge.length ))
+stopifnot(all( ph.phyloscanner$Nnode == ph.phyloscanner2$Nnode ))
+tmp <- as.character(attr(ph.phyloscanner,'SPLIT'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'SPLIT'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- as.character(attr(ph.phyloscanner,'INDIVIDUAL'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'INDIVIDUAL'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- as.character(attr(ph.phyloscanner,'BRANCH_COLOURS'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'BRANCH_COLOURS'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- attr(ph.phyloscanner,'SUBGRAPH_MRCA')
+tmp2 <- attr(ph.phyloscanner2,'SUBGRAPH_MRCA')
+stopifnot( all(tmp==tmp2) )
+	
\ No newline at end of file
diff --git a/phyloscannerR/inst/example/ex.extract.subgraph.R b/phyloscannerR/inst/example/ex.extract.subgraph.R
new file mode 100644
index 00000000..2c7c73fa
--- /dev/null
+++ b/phyloscannerR/inst/example/ex.extract.subgraph.R
@@ -0,0 +1,23 @@
+#
+# Example on data from Rakai Community Cohort Study
+#
+require(phyloscannerR)
+ph.phyloscanner <- readRDS(file=system.file(file.path('extdata','ptyr292_1600_to_1849.rds'),package='phyloscannerR'))
+
+# add SIMMAP elements to tree
+# because internally the extract.subgraph function uses functions written for
+# trees in SIMMAP format
+ph.phsc.plus.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=FALSE)
+
+# extract subgraph MRCAS and choose all those corresponding to a particular host
+host <- 'RkA07714M'
+mrcas <- which( attr(ph.phsc.plus.simmap, 'SUBGRAPH_MRCA') )
+mrcas <- mrcas[ attr(ph.phsc.plus.simmap, 'INDIVIDUAL')[mrcas]==host ]	
+
+# extract subgraphs
+subgraphs <- lapply(mrcas, function(mrca) extract.subgraph(ph.phsc.plus.simmap, mrca))
+# the first subgraph consists of just 1 taxon and is therefore hard to represent as an ape object
+# note the dummy structures
+# the second subgraph consists of 9 taxa and is in standard ape format
+# note the extra elements "subgraph.name", "subgraph.root.edge", "subgraph.parent.state"
+str( subgraphs[[2]] )
\ No newline at end of file
diff --git a/phyloscannerR/inst/example/ex.transmission.networks.R b/phyloscannerR/inst/example/ex.transmission.networks.R
new file mode 100644
index 00000000..1866aadf
--- /dev/null
+++ b/phyloscannerR/inst/example/ex.transmission.networks.R
@@ -0,0 +1,200 @@
+\dontrun{
+require(data.table)
+require(tidyverse)
+require(phyloscannerR)
+workdir<- '~/sandbox/DeepSeqProjects'
+setwd(workdir)	
+#	PLEASE NOTE: the output from PART 1+2 below is available through the package
+#	so these steps can be skipped
+	
+	
+#
+#	PART 1: run phyloscanner_analyse_trees on a large number of deepseq trees
+#
+#	prepare analysis: specify path to phyloscanner_analyse_trees
+prog.phyloscanner_analyse_trees <- '/Users/Oliver/git/phyloscanner/phyloscanner_analyse_trees.R'
+#	prepare analysis: download the phyloscanner tree of the Rakai population-based analysis  
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208473/Dataset_S1.tar?sequence=1"
+#	prepare analysis: specify directories to untar public data and to store results
+tree.dir <- "RakaiPopSample_deepseqtrees"	
+outdir <- 'RakaiPopSample_phyloscanner_analysis_190706'
+#	prepare analysis: download and untar
+download.file(tmp, destfile="Dataset_S1.tar", method="curl")
+untar("Dataset_S1.tar", exdir=tree.dir, extras='-xvf')	
+#	prepare analysis: specify valid input arguments to phyloscanner_analyse_trees
+valid.input.args <- cmd.phyloscanner.analyse.trees.valid.args(prog.phyloscanner_analyse_trees)
+#	prepare analysis: set phyloscanner variables, arguments as used for the Rakai population-based analysis
+control	<- list()
+control$allow.mt <- TRUE				
+control$alignment.file.directory = NULL 
+control$alignment.file.regex = NULL
+control$blacklist.underrepresented = FALSE	
+control$count.reads.in.parsimony = TRUE
+control$distance.threshold <- '0.025 0.05'
+control$do.dual.blacklisting = FALSE					
+control$duplicate.file.directory = NULL
+control$duplicate.file.regex = NULL
+control$file.name.regex = "^\\D*([0-9]+)_to_([0-9]+)\\D*$"
+control$guess.multifurcation.threshold = FALSE
+control$max.reads.per.host = 50
+control$min.reads.per.host <- 30
+control$min.tips.per.host <- 1	
+control$multifurcation.threshold = 1e-5
+control$multinomial= TRUE
+control$norm.constants = NULL
+control$norm.ref.file.name = system.file('HIV_DistanceNormalisationOverGenome.csv',package='phyloscannerR')
+control$norm.standardise.gag.pol = TRUE
+control$no.progress.bars = TRUE
+control$outgroup.name = "REF_CPX_AF460972"
+control$output.dir = outdir
+control$parsimony.blacklist.k = 20
+control$prune.blacklist = FALSE
+control$post.hoc.count.blacklisting= TRUE
+control$ratio.blacklist.threshold = 0 
+control$raw.blacklist.threshold = 20					
+control$recombination.file.directory = NULL
+control$recombination.file.regex = NULL
+control$relaxed.ancestry = TRUE
+control$sankoff.k = 20
+control$sankoff.unassigned.switch.threshold = 0
+control$seed = 42
+control$splits.rule = 's'
+control$tip.regex = "^(.*)_fq[0-9]+_read_([0-9]+)_count_([0-9]+)$"
+control$tree.file.regex = "^ptyr[0-9]+_InWindow_([0-9]+_to_[0-9]+)\\.tree$"
+control$use.ff = FALSE
+control$user.blacklist.directory = NULL 
+control$user.blacklist.file.regex = NULL
+control$verbosity = 1
+#	prepare analysis: list zipped tree files. One zip file contains the viral trees of individuals in one putative transmission network. 
+df <- tibble(F=list.files(tree.dir))
+df <- df %>% 
+		mutate(TYPE:= gsub('ptyr([0-9]+)_(.*)','\\2', F),
+				RUN:= as.integer(gsub('ptyr([0-9]+)_(.*)','\\1', F))) %>%
+		mutate(TYPE:= gsub('^([^\\.]+)\\.[a-z]+$','\\1',TYPE)) %>%
+		spread(TYPE, F) %>%
+		set_names(~ str_to_upper(.))
+#	make bash scripts: one each for processing the viral trees of individuals in one putative transmission network
+cmds <- vector('list',nrow(df))
+for(i in seq_len(nrow(df)))
+{
+	control$output.string <- paste0('ptyr',df$RUN[i])	
+	tree.input <- file.path(indir, df$TREES_NEWICK[i])
+	cmd <- phyloscannerR:::cmd.phyloscanner.analyse.trees(prog.phyloscanner_analyse_trees, 
+			tree.input, 
+			control,
+			valid.input.args=valid.input.args)
+	cmds[[i]] <- cmd		
+}	
+#	submit bash scripts: make PBS header
+hpc.load <- "module load anaconda3/personal"	# make third party requirements available	 
+hpc.select <- 1						# number of nodes
+hpc.nproc <- 1						# number of processors on node
+hpc.walltime <- 23						# walltime
+hpc.q <- "pqeelab"				# PBS queue
+hpc.mem <- "6gb" 					# RAM		
+hpc.array <- length(cmds)	# number of runs for job array	
+pbshead <- "#!/bin/sh"
+tmp <- paste("#PBS -l walltime=", hpc.walltime, ":59:00,pcput=", hpc.walltime, ":45:00", sep = "")
+pbshead <- paste(pbshead, tmp, sep = "\n")
+tmp <- paste("#PBS -l select=", hpc.select, ":ncpus=", hpc.nproc,":mem=", hpc.mem, sep = "")
+pbshead <- paste(pbshead, tmp, sep = "\n")
+pbshead <- paste(pbshead, "#PBS -j oe", sep = "\n")	
+if(!is.na(hpc.array))
+	pbshead	<- paste(pbshead, "\n#PBS -J 1-", hpc.array, sep='')	
+if(!is.na(hpc.q)) 
+	pbshead <- paste(pbshead, paste("#PBS -q", hpc.q), sep = "\n")
+pbshead <- paste(pbshead, hpc.load, sep = "\n")	
+cat(pbshead)
+#	submit bash scripts: make array job
+for(i in 1:length(cmds))
+	cmds[[i]]<- paste0(i,')\n',cmds[[i]],';;\n')
+cmd <- paste0('case $PBS_ARRAY_INDEX in\n',paste0(cmds, collapse=''),'esac')	
+cmd <- paste(pbshead,cmd,sep='\n')	
+#	submit bash scripts: submit job
+outfile <- gsub(':','',paste("phsc",paste(strsplit(date(),split=' ')[[1]],collapse='_',sep=''),'sh',sep='.'))
+outfile <- file.path(tmpdir, outfile)
+cat(cmd, file=outfile)
+cmd <- paste("qsub", outfile)
+cat(system(cmd, intern= TRUE))	
+
+
+
+#
+#	PART 2: find pairs in phylogenetic transmission networks 
+#
+
+
+
+#	find network pairs: get meta-data
+meta.file <- file.path(workdir,"RakaiPopSample_data","Dataset_S2.csv")
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208474/Dataset_S2.csv?sequence=2"
+download.file(tmp, destfile=meta.file, method="curl")
+dmeta <- as_tibble(read.csv(meta.file, stringsAsFactors=FALSE))
+#	find network pairs: process analyse_tree output with file names "ptyr[0-9]+_workspace.rda"
+indir <- file.path(workdir,"RakaiPopSample_phyloscanner_analysis_190706")
+control <- list(	linked.group='close.and.adjacent.cat',
+		linked.no='not.close.or.nonadjacent',
+		linked.yes='close.and.adjacent', 
+		conf.cut=0.6, 
+		neff.cut=3,
+		weight.complex.or.no.ancestry=0.5)	
+# process phyloscanner runs
+batch.regex <- '^ptyr([0-9]+)_.*'	
+infiles	<- tibble(F=list.files(indir, pattern='workspace.rda', full.names=TRUE)) %>%
+		mutate( PTY_RUN:= as.integer(gsub(batch.regex,'\\1',basename(F))) ) %>%
+		arrange(PTY_RUN)		
+dpls <- vector('list', nrow(infiles))
+dcs <- vector('list', nrow(infiles))
+dws <- vector('list', nrow(infiles))
+for(i in seq_len(nrow(infiles)))
+{		
+	if(verbose)	cat('\nReading ', infiles$F[i])
+	tmp	<- load( infiles$F[i] )
+	#	ensure we have multinomial output in workspace
+	if(!'dc'%in%tmp)
+		stop('Cannot find object "dc". Check that Analyze.trees was run with multinomial=TRUE.')
+	if(!'dwin'%in%tmp)
+		stop('Cannot find object "dwin". Check that Analyze.trees was run with multinomial=TRUE.')
+	#	find pairs
+	tmp <- phyloscannerR:::find.pairs.in.networks(dwin, dc, control=control, verbose=TRUE)
+	#	store output for each run
+	dpls[[i]] <- copy(tmp$network.pairs)
+	dcs[[i]] <- copy(tmp$relationship.counts)
+	dws[[i]] <- copy(tmp$windows)	
+	gc()
+}
+dpl <- do.call('rbind', dpls)
+dc <- do.call('rbind', dcs)
+dw <- do.call('rbind', dwins)
+dpls <- dcs <- dwins <- NULL
+#	select analysis in which each pair has highest neff
+if(verbose) cat('\nIf pairs are in several batches, select batch with most deep-sequence phylogenies...')
+tmp <- dc %>% 
+		filter( CATEGORISATION==control$linked.group & TYPE==control$linked.yes )	%>%
+		group_by(H1,H2) %>%
+		summarise(PTY_RUN= PTY_RUN[which.max(N_EFF)]) %>%
+		ungroup()
+dc <- tmp %>% left_join(dc, by=c('H1','H2','PTY_RUN'))
+dw <- tmp %>% left_join(dw, by=c('H1','H2','PTY_RUN'))
+dpl <- tmp %>% left_join(dpl, by=c('H1','H2','PTY_RUN'))	
+if(verbose) cat('\nLeft with pairs between whom linkage is not excluded phylogenetically, n=', nrow(dpls))
+#	find network pairs: save
+outfile <- file.path(workdir, "RakaiPopSample_phyloscanner_analysis_190706", "Rakai_phscnetworks_allpairs_190706.RData")
+save(dpl, dc, dw, file=outfile)
+
+
+
+#
+#	PART 3: build transmission networks from constituting pairs
+#
+
+
+infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+load(infile) 
+# loads phyloscanner relationship counts 'dc'
+control <- list(linked.group='close.and.adjacent.cat',linked.no='not.close.or.nonadjacent',linked.yes='close.and.adjacent', neff.cut=3)
+tmp <- find.networks(dc, control=control, verbose=TRUE)
+dnet <- copy(tmp$transmission.networks)
+dchain <- copy(tmp$most.likely.transmission.chains)
+
+}
diff --git a/phyloscannerR/inst/example/ex.transmission.networks.plotting.R b/phyloscannerR/inst/example/ex.transmission.networks.plotting.R
new file mode 100644
index 00000000..08858f22
--- /dev/null
+++ b/phyloscannerR/inst/example/ex.transmission.networks.plotting.R
@@ -0,0 +1,57 @@
+\dontrun{
+require(data.table)
+require(tidyverse)
+require(phyloscannerR)
+
+#
+# reconstruct transmission networks and most likely transmission chain
+#
+infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+load(infile) 	# loads phyloscanner relationship counts 'dc'
+tmp <- find.networks(dc, neff.cut=3, verbose=TRUE)
+dnet <- copy(tmp$transmission.networks)
+dchain <- copy(tmp$most.likely.transmission.chains)
+
+# get meta data
+meta.file <- file.path(workdir,"RakaiPopSample_data","Dataset_S2.csv")
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208474/Dataset_S2.csv?sequence=2"
+download.file(tmp, destfile=meta.file, method="curl")
+dmeta <- as_tibble(read.csv(meta.file, stringsAsFactors=FALSE))
+
+#
+# plot network with host names, and male/female in blue/pink nodes, 
+# 	and highlight pairings with >60% support for phylogenetic linkage
+#
+idclus <- sort(unique(dnet$IDCLU))
+di <- copy(dmeta)
+setnames(di, 'ID', 'H')
+df <- dnet %>% 
+		filter(IDCLU == 34) %>%
+		select(-c(H1_SEX,H2_SEX))		
+control<- list()
+control$point.size = 10
+control$edge.gap = 0.04
+control$edge.size = 2
+control$curvature = -0.2
+control$arrow = arrow(length = unit(0.04, "npc"), type = "open")
+control$curv.shift = 0.06
+control$label.size = 3
+control$node.label = "H" 
+control$node.fill = "SEX"
+control$node.shape = NA_character_
+control$node.shape.values = c(`NA` = 16)
+control$node.fill.values = c(F = "hotpink2", M = "steelblue2")
+control$threshold.linked = 0.6
+p <- plot.network(df, di, control)	
+
+#
+# plot most likely chain with host names, and male/female in blue/pink nodes, 
+# 	and highlight pairings with >60% support for phylogenetic linkage
+#
+control$layout <- p$layout 
+di <- copy(dmeta)			
+setnames(di, 'ID', 'H')
+df <- subset(dchain, IDCLU==34)	
+p2 <- plot.chain(df, di, control)
+
+}
\ No newline at end of file
diff --git a/phyloscannerR/inst/extdata/Rakai_phscnetworks_allpairs_190706.RData b/phyloscannerR/inst/extdata/Rakai_phscnetworks_allpairs_190706.RData
new file mode 100644
index 00000000..e7980f77
Binary files /dev/null and b/phyloscannerR/inst/extdata/Rakai_phscnetworks_allpairs_190706.RData differ
diff --git a/phyloscannerR/inst/extdata/ptyr292_1600_to_1849.rds b/phyloscannerR/inst/extdata/ptyr292_1600_to_1849.rds
new file mode 100644
index 00000000..103f37b2
Binary files /dev/null and b/phyloscannerR/inst/extdata/ptyr292_1600_to_1849.rds differ
diff --git a/phyloscannerR/man/cmd.phyloscanner.analyse.trees.Rd b/phyloscannerR/man/cmd.phyloscanner.analyse.trees.Rd
new file mode 100644
index 00000000..e1baa7e3
--- /dev/null
+++ b/phyloscannerR/man/cmd.phyloscanner.analyse.trees.Rd
@@ -0,0 +1,133 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/command_line_utilities.R
+\name{cmd.phyloscanner.analyse.trees}
+\alias{cmd.phyloscanner.analyse.trees}
+\title{Make script file for a phyloscanner analysis on a tree or set of trees}
+\usage{
+cmd.phyloscanner.analyse.trees(prog.phyloscanner_analyse_trees, tree.input,
+  control,
+  valid.input.args = cmd.phyloscanner.analyse.trees.valid.args(prog.phyloscanner_analyse_trees))
+}
+\arguments{
+\item{prog.phyloscanner_analyse_trees}{The full file name of \code{phyloscanner_analyse_trees.R}.}
+
+\item{tree.input}{One of the following: the name of a single tree file (Newick or NEXUS format); the directory containing all input trees; a zip file containing input trees.}
+
+\item{control}{List of input arguments to \link{\code{phyloscanner_analyse_trees}}.}
+
+\item{valid.input.args}{Vector of valid input arguments.}
+}
+\value{
+A character string of UNIX commands.
+}
+\description{
+This function makes a UNIX script file to call \code{phyloscanner_analyse_trees.R}. Usually, this is useful to parallelise computations; see the Examples.
+}
+\examples{
+\dontrun{
+require(data.table)
+require(tidyverse)
+require(phyloscannerR)
+
+#	specify path to phyloscanner_analyse_trees
+prog.phyloscanner_analyse_trees <- '/Users/Oliver/git/phyloscanner/phyloscanner_analyse_trees.R'
+#	specify out directory
+outdir <- '/Users/Oliver/sandbox/DeepSeqProjects/RakaiPopSample_phsc_out190512'
+#	specify valid input arguments to phyloscanner_analyse_trees
+valid.input.args <- cmd.phyloscanner.analyse.trees.valid.args(prog.phyloscanner_analyse_trees)
+
+#	set phyloscanner variables
+#	arguments as used for the Rakai population-based analysis
+control	<- list()
+control$allow.mt <- TRUE				
+control$alignment.file.directory = NULL 
+control$alignment.file.regex = NULL
+control$blacklist.underrepresented = FALSE	
+control$count.reads.in.parsimony = TRUE
+control$distance.threshold <- '0.025 0.05'
+control$do.dual.blacklisting = FALSE					
+control$duplicate.file.directory = NULL
+control$duplicate.file.regex = NULL
+control$file.name.regex = "^\\\\D*([0-9]+)_to_([0-9]+)\\\\D*$"
+control$guess.multifurcation.threshold = FALSE
+control$max.reads.per.host = 50
+control$min.reads.per.host <- 30
+control$min.tips.per.host <- 1	
+control$multifurcation.threshold = 1e-5
+control$multinomial= TRUE
+control$norm.constants = NULL
+control$norm.ref.file.name = system.file('HIV_DistanceNormalisationOverGenome.csv',package='phyloscannerR')
+control$norm.standardise.gag.pol = TRUE
+control$no.progress.bars = TRUE
+control$outgroup.name = "REF_CPX_AF460972"
+control$output.dir = outdir
+control$parsimony.blacklist.k = 20
+control$prune.blacklist = FALSE
+control$post.hoc.count.blacklisting= TRUE
+control$ratio.blacklist.threshold = 0 
+control$raw.blacklist.threshold = 20					
+control$recombination.file.directory = NULL
+control$recombination.file.regex = NULL
+control$relaxed.ancestry = TRUE
+control$sankoff.k = 20
+control$sankoff.unassigned.switch.threshold = 0
+control$seed = 42
+control$splits.rule = 's'
+control$tip.regex = "^(.*)_fq[0-9]+_read_([0-9]+)_count_([0-9]+)$"
+control$tree.file.regex = "^ptyr[0-9]+_InWindow_([0-9]+_to_[0-9]+)\\\\.tree$"
+control$use.ff = FALSE
+control$user.blacklist.directory = NULL 
+control$user.blacklist.file.regex = NULL
+control$verbosity = 1
+
+#
+#	Example 1: make bash for one file
+#
+tree.input <- system.file(file.path('extdata','Rakai_run192_trees.zip'),package='phyloscannerR')
+control$output.string <- 'Rakai_run192'
+cmd <- cmd.phyloscanner.analyse.trees(prog.phyloscanner_analyse_trees, 
+		tree.input, 
+		control,
+		valid.input.args=valid.input.args)
+cat(cmd)
+
+#
+#	Example 2: make bash for many files
+#
+#	download the phyloscanner tree of the Rakai population-based analysis  
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208473/Dataset_S1.tar?sequence=1"
+#	specify directory to untar public data
+tree.dir <- "RakaiPopSample_deepseqtrees"
+#	download and untar
+download.file(tmp, destfile="Dataset_S1.tar", method="curl")
+untar("Dataset_S1.tar", exdir=tree.dir, extras='-xvf')	
+#	list zipped tree files. One zip file contains the viral trees of individuals in one putative transmission network. 
+df <- tibble(F=list.files(tree.dir))
+df <- df \%>\% 
+		mutate(TYPE:= gsub('ptyr([0-9]+)_(.*)','\\\\2', F),
+			RUN:= as.integer(gsub('ptyr([0-9]+)_(.*)','\\\\1', F))) \%>\%
+		mutate(TYPE:= gsub('^([^\\\\.]+)\\\\.[a-z]+$','\\\\1',TYPE)) \%>\%
+		spread(TYPE, F) \%>\%
+		set_names(~ str_to_upper(.))
+#	make one bash script for processing the viral trees of individuals in one putative transmission network.
+cmds <- vector('list',nrow(df))
+for(i in seq_len(nrow(df)))
+{
+	control$output.string <- paste0('ptyr',df$RUN[i])	
+	tree.input <- file.path(indir, df$TREES_NEWICK[i])
+	cmd <- cmd.phyloscanner.analyse.trees(prog.phyloscanner_analyse_trees, 
+			tree.input, 
+			control,
+			valid.input.args=valid.input.args)
+	cmds[[i]] <- cmd		
+}	
+#	output
+cat(cmds[[100]])
+}
+}
+\seealso{
+\link{\code{phyloscanner.analyse.trees}}, \link{\code{cmd.phyloscanner.analyse.trees.valid.args}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/cmd.phyloscanner.analyse.trees.valid.args.Rd b/phyloscannerR/man/cmd.phyloscanner.analyse.trees.valid.args.Rd
new file mode 100644
index 00000000..9d4e986e
--- /dev/null
+++ b/phyloscannerR/man/cmd.phyloscanner.analyse.trees.valid.args.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/command_line_utilities.R
+\name{cmd.phyloscanner.analyse.trees.valid.args}
+\alias{cmd.phyloscanner.analyse.trees.valid.args}
+\title{Obtain valid input arguments for a phyloscanner analysis on a tree or set of trees}
+\usage{
+cmd.phyloscanner.analyse.trees.valid.args(prog.phyloscanner_analyse_trees)
+}
+\arguments{
+\item{prog.phyloscanner_analyse_trees}{The full file name of \code{phyloscanner_analyse_trees.R}.}
+}
+\description{
+Obtain valid input arguments for a phyloscanner analysis on a tree or set of trees
+}
+\seealso{
+\link{\code{cmd.phyloscanner.analyse.trees}}
+}
diff --git a/phyloscannerR/man/count.pairwise.relationships.Rd b/phyloscannerR/man/count.pairwise.relationships.Rd
index b72d7415..7e7fb7a8 100644
--- a/phyloscannerR/man/count.pairwise.relationships.Rd
+++ b/phyloscannerR/man/count.pairwise.relationships.Rd
@@ -26,7 +26,7 @@ require(phyloscannerR)
 #	continue Rakai example,
 #	load phyloscanner output from 'phyloscanner.analyse.trees'
 #	
-file	<- system.file(file.path('extdata','ptyr192_phsc_analyse_trees_output.R'),package='phyloscannerR')
+file	<- system.file(file.path('extdata','ptyr192_phsc_analyse_trees_output.Rdata'),package='phyloscannerR')
 load(file)	#loads 'phsc', output from 'phyloscanner.analyse.trees'
 #	use distance thresholds found in analysis of Rakai couples
 close.threshold <- 0.025
diff --git a/phyloscannerR/man/extract.subgraph.Rd b/phyloscannerR/man/extract.subgraph.Rd
new file mode 100644
index 00000000..d099557b
--- /dev/null
+++ b/phyloscannerR/man/extract.subgraph.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tree_utility_functions.R
+\name{extract.subgraph}
+\alias{extract.subgraph}
+\title{Extract subgraph from phyloscanner tree}
+\usage{
+extract.subgraph(ph, mrca)
+}
+\arguments{
+\item{ph}{A tree of class \code{SIMMAP} and \code{phyloscanner}, i.e. with elements \code{maps}, \code{mapped.edge}, \code{node.states} and with attributes \code{SPLIT}, \code{INDIVIDUAL}, \code{BRANCH_COLOURS}, \code{SUBGRAPH_MRCA}.}
+
+\item{mrca}{Node index of subgraph MRCA}
+}
+\value{
+Subgraph in ape format, with additional elements \code{subgraph.name} (subgraph name in the phyloscanner SPLIT attribute), \code{subgraph.root.edge} (length of the ancestral edge of the subgraph MRCA), \code{subgraph.parent.state} (ancestral state of the parent of subgraph MRCA).
+}
+\description{
+Extract subgraph from phyloscanner tree
+}
+\examples{
+#
+# Example on data from Rakai Community Cohort Study
+#
+require(phyloscannerR)
+ph.phyloscanner <- readRDS(file=system.file(file.path('extdata','ptyr292_1600_to_1849.rds'),package='phyloscannerR'))
+
+# add SIMMAP elements to tree
+# because internally the extract.subgraph function uses functions written for
+# trees in SIMMAP format
+ph.phsc.plus.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=FALSE)
+
+# extract subgraph MRCAS and choose all those corresponding to a particular host
+host <- 'RkA07714M'
+mrcas <- which( attr(ph.phsc.plus.simmap, 'SUBGRAPH_MRCA') )
+mrcas <- mrcas[ attr(ph.phsc.plus.simmap, 'INDIVIDUAL')[mrcas]==host ]	
+
+# extract subgraphs
+subgraphs <- lapply(mrcas, function(mrca) extract.subgraph(ph.phsc.plus.simmap, mrca))
+# the first subgraph consists of just 1 taxon and is therefore hard to represent as an ape object
+# note the dummy structures
+# the second subgraph consists of 9 taxa and is in standard ape format
+# note the extra elements "subgraph.name", "subgraph.root.edge", "subgraph.parent.state"
+str( subgraphs[[2]] )
+}
+\seealso{
+\code{\link{phyloscanner.to.simmap}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/find.most.likely.chains.RBGLedmonds.Rd b/phyloscannerR/man/find.most.likely.chains.RBGLedmonds.Rd
new file mode 100644
index 00000000..1a2fa6f0
--- /dev/null
+++ b/phyloscannerR/man/find.most.likely.chains.RBGLedmonds.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/transmission_network_functions.R
+\name{find.most.likely.chains.RBGLedmonds}
+\alias{find.most.likely.chains.RBGLedmonds}
+\title{Find most likely transmission chains}
+\usage{
+find.most.likely.chains.RBGLedmonds(rtnn,
+  weight.complex.or.no.ancestry = 0.5, verbose = 0)
+}
+\arguments{
+\item{rtnn}{tibble encoding the three edges of a phyloscanner transmission network. Must have columns 'H1','H2','IDCLU','TYPE','SCORE','K_EFF'.}
+
+\item{weight.complex.or.no.ancestry}{weight given to score complex.or.no.ancestry, default: 50%}
+}
+\value{
+tibble encoding the most likely chain. Has columns 'H1','H2','IDCLU', 'LINK_12', 'LINK_21' (either 1 or 0 for a link in the corresponding direction), and 'MX_PROB_12', 'MX_PROB_21' (associated posterior probabilities)
+}
+\description{
+This function finds the most likely transmission 
+chain among all chains spanning the nodes in a specified transmission network.
+The transmission network consists of at most three edges between a set of
+individuals (directed edge in either direction, and undirected edge). Chains 
+are defined as spanning graphs through the set of nodes, without loops and with
+in-degree equal to one for all nodes, except the start node. Each directed edge 
+in a chain has a weight, which corresponds to the phylogenetic evidence of transmission
+in this direction. It is set to the phyloscanner score for transmission in this 
+direction plus half the phyloscanner score of the undirected edge, for transmission 
+with direction unclear. The probability of the entire chain is given by the product
+of the phyloscanner scores along each edge in the chain.   
+Edmonds algorithm is used to solve for the most probable chain.
+}
+\seealso{
+\code{\link{find.networks}}
+}
+\author{
+Oliver Ratmann
+}
+\keyword{internal}
diff --git a/phyloscannerR/man/find.networks.Rd b/phyloscannerR/man/find.networks.Rd
new file mode 100644
index 00000000..221eb4f1
--- /dev/null
+++ b/phyloscannerR/man/find.networks.Rd
@@ -0,0 +1,264 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/transmission_network_functions.R
+\name{find.networks}
+\alias{find.networks}
+\title{Find phylogenetic transmission networks and most likely transmission chain}
+\usage{
+find.networks(dc, control = list(linked.group = "close.and.adjacent.cat",
+  linked.no = "not.close.or.nonadjacent", linked.yes =
+  "close.and.adjacent", neff.cut = 3, weight.complex.or.no.ancestry = 0.5),
+  verbose = TRUE)
+}
+\arguments{
+\item{dc}{Summary of phylogenetic relationship counts for each pair, stored as tibble.}
+
+\item{control}{List of control variables:
+\itemize{
+        \item{\code{linked.group}} Phyloscanner classification used to identify pairs in networks. Default 'close.and.adjacent.cat'.
+        \item{\code{linked.no}} Phyloscanner classification type quantifying that pairs are not linked. Default 'not.close.or.nonadjacent'.
+        \item{\code{linked.yes}} Phyloscanner classification type quantifying that pairs are linked. Default 'close.and.adjacent'.
+        \item{\code{neff.cut}} Threshold on the minimum number of deep-sequence phylogenies with sufficient reads from two individuals to make any phylogenetic inferences. Default: 3.
+        \item{\code{weight.complex.or.no.ancestry}} Weight given to score complex.or.no.ancestry. Default: 50%.  
+}}
+
+\item{verbose}{Flag to switch on/off verbose mode. Default: TRUE.}
+}
+\value{
+list of two R objects 
+\itemize{
+		\item{\code{transmission.networks}} is a tibble that describes the edge list of pairs of individuals in a network, and corresponding phyloscanner scores
+		\item{\code{most.likely.transmission.chains}} is a tibble that describes the edge list of pairs of individuals in the most likely chain, and corresponding phyloscanner scores
+}
+See description.
+}
+\description{
+This function computes transmission networks from phyloscanner output of a population-based deep sequence sample. 
+A transmission network is defined as a set of individuals between whom phylogenetic linkage is not excluded.
+Every individual in the network has at least one partner in the network between whom evidence for being phylogenetically unlinked is below a threshold.
+These pairs of individuals are identified with a separate function, \code{\link{find.pairs.in.networks}}.  
+Due to the nature of the deep-sequence
+data, there are up to three edges between pairs of individuals, giving the strength of evidence
+of spread in each direction (two possibilities) and the strength of evidence for phylogenetic linkage with 
+the direction remaining unclear. Some of these pairs have limited evidence for phylogenetic linkage. The networks
+are best interpreted as partially sampled transmission chain. 
+
+This function also finds the most likely transmission 
+chain among all chains spanning the nodes in a specified transmission network.
+The transmission network consists of at most three edges between a set of
+individuals (directed edge in either direction, and undirected edge). Chains 
+are defined as spanning graphs through the set of nodes, without loops and with
+in-degree equal to one for all nodes, except the start node. Each directed edge 
+in a chain has a weight, which corresponds to the phylogenetic evidence of transmission
+in this direction. It is set to the phyloscanner score for transmission in this 
+direction plus half the phyloscanner score of the undirected edge, for transmission 
+with direction unclear. The probability of the entire chain is given by the product
+of the phyloscanner scores along each edge in the chain.
+}
+\examples{
+\dontrun{
+require(data.table)
+require(tidyverse)
+require(phyloscannerR)
+workdir<- '~/sandbox/DeepSeqProjects'
+setwd(workdir)	
+#	PLEASE NOTE: the output from PART 1+2 below is available through the package
+#	so these steps can be skipped
+	
+	
+#
+#	PART 1: run phyloscanner_analyse_trees on a large number of deepseq trees
+#
+#	prepare analysis: specify path to phyloscanner_analyse_trees
+prog.phyloscanner_analyse_trees <- '/Users/Oliver/git/phyloscanner/phyloscanner_analyse_trees.R'
+#	prepare analysis: download the phyloscanner tree of the Rakai population-based analysis  
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208473/Dataset_S1.tar?sequence=1"
+#	prepare analysis: specify directories to untar public data and to store results
+tree.dir <- "RakaiPopSample_deepseqtrees"	
+outdir <- 'RakaiPopSample_phyloscanner_analysis_190706'
+#	prepare analysis: download and untar
+download.file(tmp, destfile="Dataset_S1.tar", method="curl")
+untar("Dataset_S1.tar", exdir=tree.dir, extras='-xvf')	
+#	prepare analysis: specify valid input arguments to phyloscanner_analyse_trees
+valid.input.args <- cmd.phyloscanner.analyse.trees.valid.args(prog.phyloscanner_analyse_trees)
+#	prepare analysis: set phyloscanner variables, arguments as used for the Rakai population-based analysis
+control	<- list()
+control$allow.mt <- TRUE				
+control$alignment.file.directory = NULL 
+control$alignment.file.regex = NULL
+control$blacklist.underrepresented = FALSE	
+control$count.reads.in.parsimony = TRUE
+control$distance.threshold <- '0.025 0.05'
+control$do.dual.blacklisting = FALSE					
+control$duplicate.file.directory = NULL
+control$duplicate.file.regex = NULL
+control$file.name.regex = "^\\\\D*([0-9]+)_to_([0-9]+)\\\\D*$"
+control$guess.multifurcation.threshold = FALSE
+control$max.reads.per.host = 50
+control$min.reads.per.host <- 30
+control$min.tips.per.host <- 1	
+control$multifurcation.threshold = 1e-5
+control$multinomial= TRUE
+control$norm.constants = NULL
+control$norm.ref.file.name = system.file('HIV_DistanceNormalisationOverGenome.csv',package='phyloscannerR')
+control$norm.standardise.gag.pol = TRUE
+control$no.progress.bars = TRUE
+control$outgroup.name = "REF_CPX_AF460972"
+control$output.dir = outdir
+control$parsimony.blacklist.k = 20
+control$prune.blacklist = FALSE
+control$post.hoc.count.blacklisting= TRUE
+control$ratio.blacklist.threshold = 0 
+control$raw.blacklist.threshold = 20					
+control$recombination.file.directory = NULL
+control$recombination.file.regex = NULL
+control$relaxed.ancestry = TRUE
+control$sankoff.k = 20
+control$sankoff.unassigned.switch.threshold = 0
+control$seed = 42
+control$splits.rule = 's'
+control$tip.regex = "^(.*)_fq[0-9]+_read_([0-9]+)_count_([0-9]+)$"
+control$tree.file.regex = "^ptyr[0-9]+_InWindow_([0-9]+_to_[0-9]+)\\\\.tree$"
+control$use.ff = FALSE
+control$user.blacklist.directory = NULL 
+control$user.blacklist.file.regex = NULL
+control$verbosity = 1
+#	prepare analysis: list zipped tree files. One zip file contains the viral trees of individuals in one putative transmission network. 
+df <- tibble(F=list.files(tree.dir))
+df <- df \%>\% 
+		mutate(TYPE:= gsub('ptyr([0-9]+)_(.*)','\\\\2', F),
+				RUN:= as.integer(gsub('ptyr([0-9]+)_(.*)','\\\\1', F))) \%>\%
+		mutate(TYPE:= gsub('^([^\\\\.]+)\\\\.[a-z]+$','\\\\1',TYPE)) \%>\%
+		spread(TYPE, F) \%>\%
+		set_names(~ str_to_upper(.))
+#	make bash scripts: one each for processing the viral trees of individuals in one putative transmission network
+cmds <- vector('list',nrow(df))
+for(i in seq_len(nrow(df)))
+{
+	control$output.string <- paste0('ptyr',df$RUN[i])	
+	tree.input <- file.path(indir, df$TREES_NEWICK[i])
+	cmd <- phyloscannerR:::cmd.phyloscanner.analyse.trees(prog.phyloscanner_analyse_trees, 
+			tree.input, 
+			control,
+			valid.input.args=valid.input.args)
+	cmds[[i]] <- cmd		
+}	
+#	submit bash scripts: make PBS header
+hpc.load <- "module load anaconda3/personal"	# make third party requirements available	 
+hpc.select <- 1						# number of nodes
+hpc.nproc <- 1						# number of processors on node
+hpc.walltime <- 23						# walltime
+hpc.q <- "pqeelab"				# PBS queue
+hpc.mem <- "6gb" 					# RAM		
+hpc.array <- length(cmds)	# number of runs for job array	
+pbshead <- "#!/bin/sh"
+tmp <- paste("#PBS -l walltime=", hpc.walltime, ":59:00,pcput=", hpc.walltime, ":45:00", sep = "")
+pbshead <- paste(pbshead, tmp, sep = "\\n")
+tmp <- paste("#PBS -l select=", hpc.select, ":ncpus=", hpc.nproc,":mem=", hpc.mem, sep = "")
+pbshead <- paste(pbshead, tmp, sep = "\\n")
+pbshead <- paste(pbshead, "#PBS -j oe", sep = "\\n")	
+if(!is.na(hpc.array))
+	pbshead	<- paste(pbshead, "\\n#PBS -J 1-", hpc.array, sep='')	
+if(!is.na(hpc.q)) 
+	pbshead <- paste(pbshead, paste("#PBS -q", hpc.q), sep = "\\n")
+pbshead <- paste(pbshead, hpc.load, sep = "\\n")	
+cat(pbshead)
+#	submit bash scripts: make array job
+for(i in 1:length(cmds))
+	cmds[[i]]<- paste0(i,')\\n',cmds[[i]],';;\\n')
+cmd <- paste0('case $PBS_ARRAY_INDEX in\\n',paste0(cmds, collapse=''),'esac')	
+cmd <- paste(pbshead,cmd,sep='\\n')	
+#	submit bash scripts: submit job
+outfile <- gsub(':','',paste("phsc",paste(strsplit(date(),split=' ')[[1]],collapse='_',sep=''),'sh',sep='.'))
+outfile <- file.path(tmpdir, outfile)
+cat(cmd, file=outfile)
+cmd <- paste("qsub", outfile)
+cat(system(cmd, intern= TRUE))	
+
+
+
+#
+#	PART 2: find pairs in phylogenetic transmission networks 
+#
+
+
+
+#	find network pairs: get meta-data
+meta.file <- file.path(workdir,"RakaiPopSample_data","Dataset_S2.csv")
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208474/Dataset_S2.csv?sequence=2"
+download.file(tmp, destfile=meta.file, method="curl")
+dmeta <- as_tibble(read.csv(meta.file, stringsAsFactors=FALSE))
+#	find network pairs: process analyse_tree output with file names "ptyr[0-9]+_workspace.rda"
+indir <- file.path(workdir,"RakaiPopSample_phyloscanner_analysis_190706")
+control <- list(	linked.group='close.and.adjacent.cat',
+		linked.no='not.close.or.nonadjacent',
+		linked.yes='close.and.adjacent', 
+		conf.cut=0.6, 
+		neff.cut=3,
+		weight.complex.or.no.ancestry=0.5)	
+# process phyloscanner runs
+batch.regex <- '^ptyr([0-9]+)_.*'	
+infiles	<- tibble(F=list.files(indir, pattern='workspace.rda', full.names=TRUE)) \%>\%
+		mutate( PTY_RUN:= as.integer(gsub(batch.regex,'\\\\1',basename(F))) ) \%>\%
+		arrange(PTY_RUN)		
+dpls <- vector('list', nrow(infiles))
+dcs <- vector('list', nrow(infiles))
+dws <- vector('list', nrow(infiles))
+for(i in seq_len(nrow(infiles)))
+{		
+	if(verbose)	cat('\\nReading ', infiles$F[i])
+	tmp	<- load( infiles$F[i] )
+	#	ensure we have multinomial output in workspace
+	if(!'dc'\%in\%tmp)
+		stop('Cannot find object "dc". Check that Analyze.trees was run with multinomial=TRUE.')
+	if(!'dwin'\%in\%tmp)
+		stop('Cannot find object "dwin". Check that Analyze.trees was run with multinomial=TRUE.')
+	#	find pairs
+	tmp <- phyloscannerR:::find.pairs.in.networks(dwin, dc, control=control, verbose=TRUE)
+	#	store output for each run
+	dpls[[i]] <- copy(tmp$network.pairs)
+	dcs[[i]] <- copy(tmp$relationship.counts)
+	dws[[i]] <- copy(tmp$windows)	
+	gc()
+}
+dpl <- do.call('rbind', dpls)
+dc <- do.call('rbind', dcs)
+dw <- do.call('rbind', dwins)
+dpls <- dcs <- dwins <- NULL
+#	select analysis in which each pair has highest neff
+if(verbose) cat('\\nIf pairs are in several batches, select batch with most deep-sequence phylogenies...')
+tmp <- dc \%>\% 
+		filter( CATEGORISATION==control$linked.group & TYPE==control$linked.yes )	\%>\%
+		group_by(H1,H2) \%>\%
+		summarise(PTY_RUN= PTY_RUN[which.max(N_EFF)]) \%>\%
+		ungroup()
+dc <- tmp \%>\% left_join(dc, by=c('H1','H2','PTY_RUN'))
+dw <- tmp \%>\% left_join(dw, by=c('H1','H2','PTY_RUN'))
+dpl <- tmp \%>\% left_join(dpl, by=c('H1','H2','PTY_RUN'))	
+if(verbose) cat('\\nLeft with pairs between whom linkage is not excluded phylogenetically, n=', nrow(dpls))
+#	find network pairs: save
+outfile <- file.path(workdir, "RakaiPopSample_phyloscanner_analysis_190706", "Rakai_phscnetworks_allpairs_190706.RData")
+save(dpl, dc, dw, file=outfile)
+
+
+
+#
+#	PART 3: build transmission networks from constituting pairs
+#
+
+
+infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+load(infile) 
+# loads phyloscanner relationship counts 'dc'
+control <- list(linked.group='close.and.adjacent.cat',linked.no='not.close.or.nonadjacent',linked.yes='close.and.adjacent', neff.cut=3)
+tmp <- find.networks(dc, control=control, verbose=TRUE)
+dnet <- copy(tmp$transmission.networks)
+dchain <- copy(tmp$most.likely.transmission.chains)
+
+}
+}
+\seealso{
+\code{\link{find.pairs.in.networks}}, \code{\link{plot.network}}, \code{\link{plot.chain}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/find.pairs.in.networks.Rd b/phyloscannerR/man/find.pairs.in.networks.Rd
new file mode 100644
index 00000000..c7e2e227
--- /dev/null
+++ b/phyloscannerR/man/find.pairs.in.networks.Rd
@@ -0,0 +1,248 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/transmission_network_functions.R
+\name{find.pairs.in.networks}
+\alias{find.pairs.in.networks}
+\title{Find pairs of individuals between whom linkage is not excluded phylogenetically}
+\usage{
+find.pairs.in.networks(dwin, dc, control = list(linked.group =
+  "close.and.adjacent.cat", linked.no = "not.close.or.nonadjacent",
+  linked.yes = "close.and.adjacent", conf.cut = 0.6, neff.cut = 3),
+  dmeta = NULL, verbose = TRUE)
+}
+\arguments{
+\item{dwin}{A \code{data.frame} describing pairwise relationships between the hosts in each tree; normally output of \code{classify.pairwise.relationships}}
+
+\item{dc}{A \code{data.frame} summarising pairwise relationships between the hosts across all trees; normally output of \code{count.pairwise.relationships}}
+
+\item{control}{List of control variables:
+\itemize{
+        \item{\code{linked.group}} Phyloscanner classification used to identify pairs in networks. Default 'close.and.adjacent.cat'.
+        \item{\code{linked.no}} Phyloscanner classification type quantifying that pairs are not linked. Default 'not.close.or.nonadjacent'.
+        \item{\code{linked.yes}} Phyloscanner classification type quantifying that pairs are linked. Default 'close.and.adjacent'.
+        \item{\code{conf.cut}} Threshold on the proportion of deep-sequence phylogenies with distant/disconnected subgraphs above which pairs are considered phylogenetically unlinked. Default: 0.6
+        \item{\code{neff.cut}} Threshold on the minimum number of deep-sequence phylogenies with sufficient reads from two individuals to make any phylogenetic inferences. Default: 3.
+}}
+
+\item{dmeta}{Optional individual-level meta-data that is to be added to output. Can be NULL.}
+
+\item{verbose}{Flag to switch on/off verbose mode. Default: TRUE.}
+}
+\value{
+Three R objects are generated: 
+\itemize{
+		\item{\code{network.pairs}} is a tibble that describes pairs of individuals between whom linkage is not excluded phylogenetically.
+		\item{\code{relationship.counts}} is a tibble that summarises the phylogenetic relationship counts for each pair.
+		\item{\code{windows}} is a tibble that describes the basic phyloscanner statistics (distance, adjacency, paths between subgraphs) in each deep-sequence phylogeny for each pair.
+}
+}
+\description{
+This function identifies pairs of individuals between whom linkage is not excluded phylogenetically in a large number of phyloscanner analyses, and provides detailed information on them.
+}
+\examples{
+\dontrun{
+require(data.table)
+require(tidyverse)
+require(phyloscannerR)
+workdir<- '~/sandbox/DeepSeqProjects'
+setwd(workdir)	
+#	PLEASE NOTE: the output from PART 1+2 below is available through the package
+#	so these steps can be skipped
+	
+	
+#
+#	PART 1: run phyloscanner_analyse_trees on a large number of deepseq trees
+#
+#	prepare analysis: specify path to phyloscanner_analyse_trees
+prog.phyloscanner_analyse_trees <- '/Users/Oliver/git/phyloscanner/phyloscanner_analyse_trees.R'
+#	prepare analysis: download the phyloscanner tree of the Rakai population-based analysis  
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208473/Dataset_S1.tar?sequence=1"
+#	prepare analysis: specify directories to untar public data and to store results
+tree.dir <- "RakaiPopSample_deepseqtrees"	
+outdir <- 'RakaiPopSample_phyloscanner_analysis_190706'
+#	prepare analysis: download and untar
+download.file(tmp, destfile="Dataset_S1.tar", method="curl")
+untar("Dataset_S1.tar", exdir=tree.dir, extras='-xvf')	
+#	prepare analysis: specify valid input arguments to phyloscanner_analyse_trees
+valid.input.args <- cmd.phyloscanner.analyse.trees.valid.args(prog.phyloscanner_analyse_trees)
+#	prepare analysis: set phyloscanner variables, arguments as used for the Rakai population-based analysis
+control	<- list()
+control$allow.mt <- TRUE				
+control$alignment.file.directory = NULL 
+control$alignment.file.regex = NULL
+control$blacklist.underrepresented = FALSE	
+control$count.reads.in.parsimony = TRUE
+control$distance.threshold <- '0.025 0.05'
+control$do.dual.blacklisting = FALSE					
+control$duplicate.file.directory = NULL
+control$duplicate.file.regex = NULL
+control$file.name.regex = "^\\\\D*([0-9]+)_to_([0-9]+)\\\\D*$"
+control$guess.multifurcation.threshold = FALSE
+control$max.reads.per.host = 50
+control$min.reads.per.host <- 30
+control$min.tips.per.host <- 1	
+control$multifurcation.threshold = 1e-5
+control$multinomial= TRUE
+control$norm.constants = NULL
+control$norm.ref.file.name = system.file('HIV_DistanceNormalisationOverGenome.csv',package='phyloscannerR')
+control$norm.standardise.gag.pol = TRUE
+control$no.progress.bars = TRUE
+control$outgroup.name = "REF_CPX_AF460972"
+control$output.dir = outdir
+control$parsimony.blacklist.k = 20
+control$prune.blacklist = FALSE
+control$post.hoc.count.blacklisting= TRUE
+control$ratio.blacklist.threshold = 0 
+control$raw.blacklist.threshold = 20					
+control$recombination.file.directory = NULL
+control$recombination.file.regex = NULL
+control$relaxed.ancestry = TRUE
+control$sankoff.k = 20
+control$sankoff.unassigned.switch.threshold = 0
+control$seed = 42
+control$splits.rule = 's'
+control$tip.regex = "^(.*)_fq[0-9]+_read_([0-9]+)_count_([0-9]+)$"
+control$tree.file.regex = "^ptyr[0-9]+_InWindow_([0-9]+_to_[0-9]+)\\\\.tree$"
+control$use.ff = FALSE
+control$user.blacklist.directory = NULL 
+control$user.blacklist.file.regex = NULL
+control$verbosity = 1
+#	prepare analysis: list zipped tree files. One zip file contains the viral trees of individuals in one putative transmission network. 
+df <- tibble(F=list.files(tree.dir))
+df <- df \%>\% 
+		mutate(TYPE:= gsub('ptyr([0-9]+)_(.*)','\\\\2', F),
+				RUN:= as.integer(gsub('ptyr([0-9]+)_(.*)','\\\\1', F))) \%>\%
+		mutate(TYPE:= gsub('^([^\\\\.]+)\\\\.[a-z]+$','\\\\1',TYPE)) \%>\%
+		spread(TYPE, F) \%>\%
+		set_names(~ str_to_upper(.))
+#	make bash scripts: one each for processing the viral trees of individuals in one putative transmission network
+cmds <- vector('list',nrow(df))
+for(i in seq_len(nrow(df)))
+{
+	control$output.string <- paste0('ptyr',df$RUN[i])	
+	tree.input <- file.path(indir, df$TREES_NEWICK[i])
+	cmd <- phyloscannerR:::cmd.phyloscanner.analyse.trees(prog.phyloscanner_analyse_trees, 
+			tree.input, 
+			control,
+			valid.input.args=valid.input.args)
+	cmds[[i]] <- cmd		
+}	
+#	submit bash scripts: make PBS header
+hpc.load <- "module load anaconda3/personal"	# make third party requirements available	 
+hpc.select <- 1						# number of nodes
+hpc.nproc <- 1						# number of processors on node
+hpc.walltime <- 23						# walltime
+hpc.q <- "pqeelab"				# PBS queue
+hpc.mem <- "6gb" 					# RAM		
+hpc.array <- length(cmds)	# number of runs for job array	
+pbshead <- "#!/bin/sh"
+tmp <- paste("#PBS -l walltime=", hpc.walltime, ":59:00,pcput=", hpc.walltime, ":45:00", sep = "")
+pbshead <- paste(pbshead, tmp, sep = "\\n")
+tmp <- paste("#PBS -l select=", hpc.select, ":ncpus=", hpc.nproc,":mem=", hpc.mem, sep = "")
+pbshead <- paste(pbshead, tmp, sep = "\\n")
+pbshead <- paste(pbshead, "#PBS -j oe", sep = "\\n")	
+if(!is.na(hpc.array))
+	pbshead	<- paste(pbshead, "\\n#PBS -J 1-", hpc.array, sep='')	
+if(!is.na(hpc.q)) 
+	pbshead <- paste(pbshead, paste("#PBS -q", hpc.q), sep = "\\n")
+pbshead <- paste(pbshead, hpc.load, sep = "\\n")	
+cat(pbshead)
+#	submit bash scripts: make array job
+for(i in 1:length(cmds))
+	cmds[[i]]<- paste0(i,')\\n',cmds[[i]],';;\\n')
+cmd <- paste0('case $PBS_ARRAY_INDEX in\\n',paste0(cmds, collapse=''),'esac')	
+cmd <- paste(pbshead,cmd,sep='\\n')	
+#	submit bash scripts: submit job
+outfile <- gsub(':','',paste("phsc",paste(strsplit(date(),split=' ')[[1]],collapse='_',sep=''),'sh',sep='.'))
+outfile <- file.path(tmpdir, outfile)
+cat(cmd, file=outfile)
+cmd <- paste("qsub", outfile)
+cat(system(cmd, intern= TRUE))	
+
+
+
+#
+#	PART 2: find pairs in phylogenetic transmission networks 
+#
+
+
+
+#	find network pairs: get meta-data
+meta.file <- file.path(workdir,"RakaiPopSample_data","Dataset_S2.csv")
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208474/Dataset_S2.csv?sequence=2"
+download.file(tmp, destfile=meta.file, method="curl")
+dmeta <- as_tibble(read.csv(meta.file, stringsAsFactors=FALSE))
+#	find network pairs: process analyse_tree output with file names "ptyr[0-9]+_workspace.rda"
+indir <- file.path(workdir,"RakaiPopSample_phyloscanner_analysis_190706")
+control <- list(	linked.group='close.and.adjacent.cat',
+		linked.no='not.close.or.nonadjacent',
+		linked.yes='close.and.adjacent', 
+		conf.cut=0.6, 
+		neff.cut=3,
+		weight.complex.or.no.ancestry=0.5)	
+# process phyloscanner runs
+batch.regex <- '^ptyr([0-9]+)_.*'	
+infiles	<- tibble(F=list.files(indir, pattern='workspace.rda', full.names=TRUE)) \%>\%
+		mutate( PTY_RUN:= as.integer(gsub(batch.regex,'\\\\1',basename(F))) ) \%>\%
+		arrange(PTY_RUN)		
+dpls <- vector('list', nrow(infiles))
+dcs <- vector('list', nrow(infiles))
+dws <- vector('list', nrow(infiles))
+for(i in seq_len(nrow(infiles)))
+{		
+	if(verbose)	cat('\\nReading ', infiles$F[i])
+	tmp	<- load( infiles$F[i] )
+	#	ensure we have multinomial output in workspace
+	if(!'dc'\%in\%tmp)
+		stop('Cannot find object "dc". Check that Analyze.trees was run with multinomial=TRUE.')
+	if(!'dwin'\%in\%tmp)
+		stop('Cannot find object "dwin". Check that Analyze.trees was run with multinomial=TRUE.')
+	#	find pairs
+	tmp <- phyloscannerR:::find.pairs.in.networks(dwin, dc, control=control, verbose=TRUE)
+	#	store output for each run
+	dpls[[i]] <- copy(tmp$network.pairs)
+	dcs[[i]] <- copy(tmp$relationship.counts)
+	dws[[i]] <- copy(tmp$windows)	
+	gc()
+}
+dpl <- do.call('rbind', dpls)
+dc <- do.call('rbind', dcs)
+dw <- do.call('rbind', dwins)
+dpls <- dcs <- dwins <- NULL
+#	select analysis in which each pair has highest neff
+if(verbose) cat('\\nIf pairs are in several batches, select batch with most deep-sequence phylogenies...')
+tmp <- dc \%>\% 
+		filter( CATEGORISATION==control$linked.group & TYPE==control$linked.yes )	\%>\%
+		group_by(H1,H2) \%>\%
+		summarise(PTY_RUN= PTY_RUN[which.max(N_EFF)]) \%>\%
+		ungroup()
+dc <- tmp \%>\% left_join(dc, by=c('H1','H2','PTY_RUN'))
+dw <- tmp \%>\% left_join(dw, by=c('H1','H2','PTY_RUN'))
+dpl <- tmp \%>\% left_join(dpl, by=c('H1','H2','PTY_RUN'))	
+if(verbose) cat('\\nLeft with pairs between whom linkage is not excluded phylogenetically, n=', nrow(dpls))
+#	find network pairs: save
+outfile <- file.path(workdir, "RakaiPopSample_phyloscanner_analysis_190706", "Rakai_phscnetworks_allpairs_190706.RData")
+save(dpl, dc, dw, file=outfile)
+
+
+
+#
+#	PART 3: build transmission networks from constituting pairs
+#
+
+
+infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+load(infile) 
+# loads phyloscanner relationship counts 'dc'
+control <- list(linked.group='close.and.adjacent.cat',linked.no='not.close.or.nonadjacent',linked.yes='close.and.adjacent', neff.cut=3)
+tmp <- find.networks(dc, control=control, verbose=TRUE)
+dnet <- copy(tmp$transmission.networks)
+dchain <- copy(tmp$most.likely.transmission.chains)
+
+}
+}
+\seealso{
+\code{\link{phyloscanner.analyse.trees}}, \code{\link{cmd.phyloscanner.analyse.trees}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/phyloscanner.analyse.trees.Rd b/phyloscannerR/man/phyloscanner.analyse.trees.Rd
index 556a3b84..c127f23d 100644
--- a/phyloscannerR/man/phyloscanner.analyse.trees.Rd
+++ b/phyloscannerR/man/phyloscanner.analyse.trees.Rd
@@ -261,3 +261,6 @@ phsc <- phyloscanner.analyse.trees(tree.file.directory,
              )
 }
 }
+\seealso{
+\code{\link{find.pairs.in.networks}}, \code{\link{find.networks}}
+}
diff --git a/phyloscannerR/man/phyloscanner.to.simmap.Rd b/phyloscannerR/man/phyloscanner.to.simmap.Rd
new file mode 100644
index 00000000..233e5833
--- /dev/null
+++ b/phyloscannerR/man/phyloscanner.to.simmap.Rd
@@ -0,0 +1,67 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tree_utility_functions.R
+\name{phyloscanner.to.simmap}
+\alias{phyloscanner.to.simmap}
+\title{Cast phyloscanner tree to SIMMAP tree}
+\usage{
+phyloscanner.to.simmap(ph, delete.phyloscanner.structures)
+}
+\arguments{
+\item{ph}{A \code{phyloscanner.tree} with attributes \code{SPLIT}, \code{INDIVIDUAL}, \code{BRANCH_COLOURS}, \code{SUBGRAPH_MRCA}.}
+
+\item{delete.phyloscanner.structures}{Logical value. If true \code{phyloscanner} attributes are removed from the tree.}
+}
+\value{
+Same tree in \code{SIMMAP} format, with elements \code{maps}, \code{mapped.edge}, \code{node.states}.
+}
+\description{
+Cast phyloscanner tree to SIMMAP tree
+}
+\examples{
+#
+# Example on data from Rakai Community Cohort Study
+#
+require(phyloscannerR)
+ph.phyloscanner <- readRDS(file=system.file(file.path('extdata','ptyr292_1600_to_1849.rds'),package='phyloscannerR'))
+
+#	cast from phyloscanner format to SIMMAP format
+#	so that the SIMMAP tree still has the phyloscanner attributes
+ph.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=FALSE)
+#	to delete the phyloscanner attributes, use
+ph.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=TRUE)
+#
+#	cast from SIMMAP format to phyloscanner format
+#	similarly, use delete.simmap.structures to keep/delete SIMMAP elements from the tree in phyloscanner format
+ph.phyloscanner2 <- simmap.to.phyloscanner(ph.simmap, delete.simmap.structures=TRUE)
+#
+#	test that ph.phyloscanner2 is the same as ph.phyloscanner:
+stopifnot(all( ph.phyloscanner$tip.label == ph.phyloscanner2$tip.label ))
+stopifnot(all( ph.phyloscanner$edge == ph.phyloscanner2$edge ))
+stopifnot(all( ph.phyloscanner$edge.length == ph.phyloscanner2$edge.length ))
+stopifnot(all( ph.phyloscanner$Nnode == ph.phyloscanner2$Nnode ))
+tmp <- as.character(attr(ph.phyloscanner,'SPLIT'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'SPLIT'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- as.character(attr(ph.phyloscanner,'INDIVIDUAL'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'INDIVIDUAL'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- as.character(attr(ph.phyloscanner,'BRANCH_COLOURS'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'BRANCH_COLOURS'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- attr(ph.phyloscanner,'SUBGRAPH_MRCA')
+tmp2 <- attr(ph.phyloscanner2,'SUBGRAPH_MRCA')
+stopifnot( all(tmp==tmp2) )
+	
+}
+\seealso{
+\code{\link{simmap.to.phyloscanner}}, \code{\link{extract.subgraph}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/plot.chain.Rd b/phyloscannerR/man/plot.chain.Rd
new file mode 100644
index 00000000..762a3191
--- /dev/null
+++ b/phyloscannerR/man/plot.chain.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotting_functions.R
+\name{plot.chain}
+\alias{plot.chain}
+\title{Plot transmission chain}
+\usage{
+plot.chain(df, di, control)
+}
+\arguments{
+\item{df}{tibble with the following columns 'H1', 'H2', 'CATEGORISATION', 'TYPE', 'SCORE'}
+
+\item{di}{tibble with meta-data to customize the plot with columns 'H', node.shape, node.label, node.fill}
+
+\item{control}{list of plotting control variables; see examples.}
+}
+\value{
+ggplot object
+}
+\description{
+This function plots a transmission chain, showing one edge with two labels: 
+L indicates the proportion of deep-sequence phylogenies in whom the two individuals are phylogenetically close and adjacent.
+D indicates the proportion of deep-sequence phylogenies that support the indicated direction of transmission, out of all 
+deep-sequence phylogenies that support either direction of transmission.
+}
+\examples{
+\dontrun{
+require(data.table)
+require(tidyverse)
+require(phyloscannerR)
+
+#
+# reconstruct transmission networks and most likely transmission chain
+#
+infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+load(infile) 	# loads phyloscanner relationship counts 'dc'
+tmp <- find.networks(dc, neff.cut=3, verbose=TRUE)
+dnet <- copy(tmp$transmission.networks)
+dchain <- copy(tmp$most.likely.transmission.chains)
+
+# get meta data
+meta.file <- file.path(workdir,"RakaiPopSample_data","Dataset_S2.csv")
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208474/Dataset_S2.csv?sequence=2"
+download.file(tmp, destfile=meta.file, method="curl")
+dmeta <- as_tibble(read.csv(meta.file, stringsAsFactors=FALSE))
+
+#
+# plot network with host names, and male/female in blue/pink nodes, 
+# 	and highlight pairings with >60\% support for phylogenetic linkage
+#
+idclus <- sort(unique(dnet$IDCLU))
+di <- copy(dmeta)
+setnames(di, 'ID', 'H')
+df <- dnet \%>\% 
+		filter(IDCLU == 34) \%>\%
+		select(-c(H1_SEX,H2_SEX))		
+control<- list()
+control$point.size = 10
+control$edge.gap = 0.04
+control$edge.size = 2
+control$curvature = -0.2
+control$arrow = arrow(length = unit(0.04, "npc"), type = "open")
+control$curv.shift = 0.06
+control$label.size = 3
+control$node.label = "H" 
+control$node.fill = "SEX"
+control$node.shape = NA_character_
+control$node.shape.values = c(`NA` = 16)
+control$node.fill.values = c(F = "hotpink2", M = "steelblue2")
+control$threshold.linked = 0.6
+p <- plot.network(df, di, control)	
+
+#
+# plot most likely chain with host names, and male/female in blue/pink nodes, 
+# 	and highlight pairings with >60\% support for phylogenetic linkage
+#
+control$layout <- p$layout 
+di <- copy(dmeta)			
+setnames(di, 'ID', 'H')
+df <- subset(dchain, IDCLU==34)	
+p2 <- plot.chain(df, di, control)
+
+}
+}
+\seealso{
+\code{\link{find.networks}}, \code{\link{plot.network}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/plot.network.Rd b/phyloscannerR/man/plot.network.Rd
new file mode 100644
index 00000000..f2e0da61
--- /dev/null
+++ b/phyloscannerR/man/plot.network.Rd
@@ -0,0 +1,87 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plotting_functions.R
+\name{plot.network}
+\alias{plot.network}
+\title{Plot transmission network}
+\usage{
+plot.network(df, di, control)
+}
+\arguments{
+\item{df}{tibble with the following columns 'H1', 'H2', 'CATEGORISATION', 'TYPE', 'SCORE'}
+
+\item{di}{tibble with meta-data to customize the plot with columns 'H', node.shape, node.label, node.fill}
+
+\item{control}{list of plotting control variables; see examples.}
+}
+\value{
+ggplot object
+}
+\description{
+This function plots a phylogenetic transmission network, showing three types of edges: 
+two directed edges respectively in the 1->2 and 2->1 direction, and an undirected edge that represents phylogenetic support of close and adjacent individuals without evidence into the direction transmission.
+}
+\examples{
+\dontrun{
+require(data.table)
+require(tidyverse)
+require(phyloscannerR)
+
+#
+# reconstruct transmission networks and most likely transmission chain
+#
+infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+load(infile) 	# loads phyloscanner relationship counts 'dc'
+tmp <- find.networks(dc, neff.cut=3, verbose=TRUE)
+dnet <- copy(tmp$transmission.networks)
+dchain <- copy(tmp$most.likely.transmission.chains)
+
+# get meta data
+meta.file <- file.path(workdir,"RakaiPopSample_data","Dataset_S2.csv")
+tmp <- "https://datadryad.org/bitstream/handle/10255/dryad.208474/Dataset_S2.csv?sequence=2"
+download.file(tmp, destfile=meta.file, method="curl")
+dmeta <- as_tibble(read.csv(meta.file, stringsAsFactors=FALSE))
+
+#
+# plot network with host names, and male/female in blue/pink nodes, 
+# 	and highlight pairings with >60\% support for phylogenetic linkage
+#
+idclus <- sort(unique(dnet$IDCLU))
+di <- copy(dmeta)
+setnames(di, 'ID', 'H')
+df <- dnet \%>\% 
+		filter(IDCLU == 34) \%>\%
+		select(-c(H1_SEX,H2_SEX))		
+control<- list()
+control$point.size = 10
+control$edge.gap = 0.04
+control$edge.size = 2
+control$curvature = -0.2
+control$arrow = arrow(length = unit(0.04, "npc"), type = "open")
+control$curv.shift = 0.06
+control$label.size = 3
+control$node.label = "H" 
+control$node.fill = "SEX"
+control$node.shape = NA_character_
+control$node.shape.values = c(`NA` = 16)
+control$node.fill.values = c(F = "hotpink2", M = "steelblue2")
+control$threshold.linked = 0.6
+p <- plot.network(df, di, control)	
+
+#
+# plot most likely chain with host names, and male/female in blue/pink nodes, 
+# 	and highlight pairings with >60\% support for phylogenetic linkage
+#
+control$layout <- p$layout 
+di <- copy(dmeta)			
+setnames(di, 'ID', 'H')
+df <- subset(dchain, IDCLU==34)	
+p2 <- plot.chain(df, di, control)
+
+}
+}
+\seealso{
+\code{\link{find.networks}}, \code{\link{plot.chain}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/produce.pairwise.graphs2.Rd b/phyloscannerR/man/produce.pairwise.graphs2.Rd
index fcf2a337..5a6a10e6 100644
--- a/phyloscannerR/man/produce.pairwise.graphs2.Rd
+++ b/phyloscannerR/man/produce.pairwise.graphs2.Rd
@@ -39,16 +39,29 @@ deep-sequence phylogeny across the genome. The genomic position on the x-axis in
 # remember that you can specify dwin to save computing time, if you have it already computed
 #
 \dontrun{	
+#
+#	Example 1
+#
 file	<- system.file(file.path('extdata','ptyr192_phsc_analyse_trees_output.RData'),package='phyloscannerR')
 load(file)	#loads 'phsc', output from 'phyloscanner.analyse.trees'	
 hosts	<- c('RkA05868F','RkA05968M','RkA00369F','RkA01344M')
 inclusion <- "both" 
-tmp	<- produce.pairwise.graphs2(phsc, hosts=hosts, inclusion = "both")
+tmp	<- phyloscannerR:::produce.pairwise.graphs2(phsc, hosts=hosts, inclusion = "both")
+tmp$graph
+
+#
+#	Example 2
+#
+infile <- system.file(file.path('extdata','Rakai_phscnetworks_allpairs_190706.RData'),package='phyloscannerR')
+load(infile) 
+# loaded "dpl" (pairs of individuals between whom linkage not excluded), "dc" (phyloscanner classification counts), "dw" (phyloscanner classifications per window)
+hosts	<- c('RkA05868F','RkA05968M','RkA00369F','RkA01344M') 
+tmp	<- phyloscannerR:::produce.pairwise.graphs2(hosts=hosts, dwin=dw, inclusion = "both")
 tmp$graph
 }
 }
 \seealso{
-\link{\code{classify.pairwise.relationships}}
+\code{\link{classify.pairwise.relationships}}
 }
 \author{
 Oliver Ratmann
diff --git a/phyloscannerR/man/simmap.to.phyloscanner.Rd b/phyloscannerR/man/simmap.to.phyloscanner.Rd
new file mode 100644
index 00000000..b9727f30
--- /dev/null
+++ b/phyloscannerR/man/simmap.to.phyloscanner.Rd
@@ -0,0 +1,67 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tree_utility_functions.R
+\name{simmap.to.phyloscanner}
+\alias{simmap.to.phyloscanner}
+\title{Cast SIMMAP tree to phyloscanner tree}
+\usage{
+simmap.to.phyloscanner(ph, delete.simmap.structures)
+}
+\arguments{
+\item{ph}{A \code{SIMMAP} tree with elements \code{maps}, \code{mapped.edge}, \code{node.states}.}
+
+\item{delete.simmap.structures}{Logical value. If true \code{SIMMAP} elements are removed from the tree.}
+}
+\value{
+Same tree in \code{phyloscanner.tree} format, with attributes \code{SPLIT}, \code{INDIVIDUAL}, \code{BRANCH_COLOURS}, \code{SUBGRAPH_MRCA}.
+}
+\description{
+Cast SIMMAP tree to phyloscanner tree
+}
+\examples{
+#
+# Example on data from Rakai Community Cohort Study
+#
+require(phyloscannerR)
+ph.phyloscanner <- readRDS(file=system.file(file.path('extdata','ptyr292_1600_to_1849.rds'),package='phyloscannerR'))
+
+#	cast from phyloscanner format to SIMMAP format
+#	so that the SIMMAP tree still has the phyloscanner attributes
+ph.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=FALSE)
+#	to delete the phyloscanner attributes, use
+ph.simmap <- phyloscanner.to.simmap(ph.phyloscanner, delete.phyloscanner.structures=TRUE)
+#
+#	cast from SIMMAP format to phyloscanner format
+#	similarly, use delete.simmap.structures to keep/delete SIMMAP elements from the tree in phyloscanner format
+ph.phyloscanner2 <- simmap.to.phyloscanner(ph.simmap, delete.simmap.structures=TRUE)
+#
+#	test that ph.phyloscanner2 is the same as ph.phyloscanner:
+stopifnot(all( ph.phyloscanner$tip.label == ph.phyloscanner2$tip.label ))
+stopifnot(all( ph.phyloscanner$edge == ph.phyloscanner2$edge ))
+stopifnot(all( ph.phyloscanner$edge.length == ph.phyloscanner2$edge.length ))
+stopifnot(all( ph.phyloscanner$Nnode == ph.phyloscanner2$Nnode ))
+tmp <- as.character(attr(ph.phyloscanner,'SPLIT'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'SPLIT'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- as.character(attr(ph.phyloscanner,'INDIVIDUAL'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'INDIVIDUAL'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- as.character(attr(ph.phyloscanner,'BRANCH_COLOURS'))
+tmp2 <- as.character(attr(ph.phyloscanner2,'BRANCH_COLOURS'))
+tmp[is.na(tmp)] <- 'Unknown'
+tmp2[is.na(tmp2)] <- 'Unknown'
+stopifnot( all(tmp==tmp2) )
+tmp <- attr(ph.phyloscanner,'SUBGRAPH_MRCA')
+tmp2 <- attr(ph.phyloscanner2,'SUBGRAPH_MRCA')
+stopifnot( all(tmp==tmp2) )
+	
+}
+\seealso{
+\code{\link{phyloscanner.to.simmap}}, \code{\link{extract.subgraph}}
+}
+\author{
+Oliver Ratmann
+}
diff --git a/phyloscannerR/man/write.annotated.tree.Rd b/phyloscannerR/man/write.annotated.tree.Rd
index 8e65b610..4790e1ee 100644
--- a/phyloscannerR/man/write.annotated.tree.Rd
+++ b/phyloscannerR/man/write.annotated.tree.Rd
@@ -4,9 +4,9 @@
 \alias{write.annotated.tree}
 \title{Write the phylogeny with reconstructed host annotations to file}
 \usage{
-write.annotated.tree(ptree, file.name, format = c("pdf", "nex"),
-  pdf.scale.bar.width = 0.01, pdf.w = 50, pdf.hm = 0.15,
-  verbose = F)
+write.annotated.tree(ptree, file.name, format = c("pdf", "nex",
+  "ggplot"), tree.colours = NULL, pdf.scale.bar.width = 0.01,
+  pdf.w = 50, pdf.hm = 0.15, verbose = F)
 }
 \arguments{
 \item{file.name}{The name of the output file}
diff --git a/phyloscannerR/vignettes/.DS_Store b/phyloscannerR/vignettes/.DS_Store
new file mode 100644
index 00000000..a9928580
Binary files /dev/null and b/phyloscannerR/vignettes/.DS_Store differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dc.png b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dc.png
new file mode 100644
index 00000000..c845b57e
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dc.png differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dchain.png b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dchain.png
new file mode 100644
index 00000000..a80da697
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dchain.png differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dchainplot.pdf b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dchainplot.pdf
new file mode 100644
index 00000000..3042afa5
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dchainplot.pdf differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dnet.png b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dnet.png
new file mode 100644
index 00000000..5dc0986a
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dnet.png differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dnetplot.pdf b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dnetplot.pdf
new file mode 100644
index 00000000..9af12438
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dnetplot.pdf differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dpl.png b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dpl.png
new file mode 100644
index 00000000..a7c681aa
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dpl.png differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dw.png b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dw.png
new file mode 100644
index 00000000..f5a5e633
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_dw.png differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_findnetsoutput.png b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_findnetsoutput.png
new file mode 100644
index 00000000..9a7e0c54
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_findnetsoutput.png differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_findpairsoutput.png b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_findpairsoutput.png
new file mode 100644
index 00000000..bb0c667e
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_findpairsoutput.png differ
diff --git a/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_phyloscan.pdf b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_phyloscan.pdf
new file mode 100644
index 00000000..7244c67d
Binary files /dev/null and b/phyloscannerR/vignettes/figures/UVRI.03.reconstruct_transmission_networks_phyloscan.pdf differ
diff --git a/phyloscannerR/vignettes/phyloflows_aggregating.pdf b/phyloscannerR/vignettes/phyloflows_aggregating.pdf
new file mode 100644
index 00000000..08feec11
Binary files /dev/null and b/phyloscannerR/vignettes/phyloflows_aggregating.pdf differ
diff --git a/phyloscannerR/vignettes/phyloflows_diagnostics.pdf b/phyloscannerR/vignettes/phyloflows_diagnostics.pdf
new file mode 100644
index 00000000..d36c374f
Binary files /dev/null and b/phyloscannerR/vignettes/phyloflows_diagnostics.pdf differ
diff --git a/phyloscannerR/vignettes/phyloflows_first_example.pdf b/phyloscannerR/vignettes/phyloflows_first_example.pdf
new file mode 100644
index 00000000..c07639c8
Binary files /dev/null and b/phyloscannerR/vignettes/phyloflows_first_example.pdf differ
diff --git a/phyloscannerR/vignettes/phyloflows_keyquantities.pdf b/phyloscannerR/vignettes/phyloflows_keyquantities.pdf
new file mode 100644
index 00000000..6fa278bb
Binary files /dev/null and b/phyloscannerR/vignettes/phyloflows_keyquantities.pdf differ
diff --git a/phyloscannerR/vignettes/phyloscannerR_reconstruct_transmission_networks.Rmd b/phyloscannerR/vignettes/phyloscannerR_reconstruct_transmission_networks.Rmd
new file mode 100644
index 00000000..2c1726d8
--- /dev/null
+++ b/phyloscannerR/vignettes/phyloscannerR_reconstruct_transmission_networks.Rmd
@@ -0,0 +1,373 @@
+---
+title: "*phyloscannerR*: reconstructing transmission networks"
+author: "Oliver Ratmann and Matthew Hall"
+date: "2019-09-02"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Vignette Title}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+urlcolor: blue
+---
+
+```{r setup, echo=FALSE, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  tidy.opts=list(width.cutoff=80),
+  tidy=TRUE,
+  fig.pos= "h",
+  comment = "#>"
+)
+``` 
+```{r, include=TRUE, eval=TRUE, echo=FALSE, tidy=TRUE, results='hide',message=FALSE, warning=FALSE}
+require(knitr)
+require(kableExtra)
+```
+
+## Introduction
+In this vignette, we describe how transmission networks can be
+reconstructed from *phyloscanner* output with the R package `phyloscannerR`.
+
+This will involve the following steps:
+
+1. Find pairs of individuals between whom phylogenetic linkage cannot be
+   excluded based on the distance and topological relationship of viral reads
+   from both individuals. This will use output from a previous *phyloscanner*
+   analysis.
+2. Inspect the *phyloscanner* statistics for each deep-sequence tree for these
+   pairs.
+3. Inspect the *phyloscanner* relationship counts across all deep-sequence
+   tree for these pairs.
+4. Reconstruct distinct transmission networks between these pairs, and
+   reconstruct the most likely transmission chain for each network. 
+5. Plot the transmission networks and the transmission chains.
+   
+We will introduce the following functions from the `phyloscannerR` package:
+
+* `find.pairs.in.networks`
+* `produce.pairwise.graphs2`
+* `find.networks`
+* `plot.network`
+* `plot.chain`     
+    
+## Data    
+The data that we will be using in this vignette is from the MRC UVRI cohort in
+south-eastern Uganda, and was generated by the PANGEA consortium.
+    
+## Getting started
+We assume that output from a previous *phyloscanner* analysis is available,
+containing files that end in `*workspace.rda`. We will need only these files to
+reconstruct the transmission networks. Let us get started. 
+
+```{r, include=TRUE, eval=FALSE, echo=TRUE}
+# 	load packages
+require(tidyverse)
+require(RBGL)
+require(igraph)
+require(network)
+require(ggnet)
+require(phyloscannerR)
+
+#	directory with phyloscanner output containing the *workspace.rda file
+home <- '/Users/Oliver/Box Sync/OR_Work/2019/2019_PANGEA_BBosa'
+indir <- file.path(home,'MRCPopSample_phsc_stage2_output_newali_300_HKC_phsc')
+
+#	output files
+outdir <- file.path(home, 'MRCPopSample_phsc_stage2_output_newali_300_HKC_analysis')
+file.pairs <- file.path(outdir, 'MRCUVRI_phscallpairs_190827.rda')
+file.nets <- file.path(outdir, 'MRCUVRI_phscnetworks_190827.rda')		
+```
+
+## Pairs of individuals between whom linkage is not excluded 
+We will now processes the *phyloscanner* output in `indir`. We will use the
+*phyloscanner* specification as used for [the
+Rakai analysis](https://www.nature.com/articles/s41467-019-09139-4). The next
+lines of code specify a list of `control` options, and then use function
+`find.pairs.in.networks`.
+
+```{r, include=TRUE, eval=FALSE, echo=TRUE, tidy=FALSE, results='hide'}
+#	control options
+#
+#	phyloscanner produces several different phylogenetic classifications
+#	schemes; for example by phylogenetic distance only; or using phylogenetic
+#	distance and topologic adjacency (`close.and.adjacent.cat`); or phylogenetic
+#	distance and topologic contiguity. We select one of them.
+#
+control <- list(	linked.group='close.and.adjacent.cat',
+#	( use classification based on phylogenetic distance and topological adjacency )		
+					linked.no='not.close.or.nonadjacent',
+#	( pairs are interpreted to be unlinked 
+#	  if classified as 'not.close.or.nonadjacent' )		
+					linked.yes='close.and.adjacent',
+#	( pairs are interpreted to be linked 
+#	  if classified as 'close.or.nonadjacent' ) 		
+					conf.cut=0.6, 
+#	( threshold on the proportion of deep-sequence phylogenies
+#	  pairs unlinked in more than this are discarded )		
+					neff.cut=3
+#	( threshold on the effective number of deep-sequence phylogenies
+#	  pairs with fewer data are discarded )			
+					)
+										
+tmp <- find.pairs.in.networks(	indir, 
+								batch.regex='^ptyr([0-9]+)_.*', 
+#	( batch.regex identifies the batch number from the
+#     file names of *phyloscanner* output. Typically, this batch number corresponds
+#     to the analysis of a particular transmission network. )								
+								control=control, 
+								verbose=TRUE)
+						
+dpl <- copy(tmp$network.pairs) 	
+#	the pairs
+dc <- copy(tmp$relationship.counts) 
+#	their relationship counts summed over phylogenies
+dw <- copy(tmp$windows)	
+#	their relationship stats for all phylogenies
+
+#save(dpl, dc, dw, file=file.pairs)
+```
+
+<!--
+The above should produce the following output or similar (when
+`verbose=TRUE`): ```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE}
+knitr::include_graphics("UVRI.03.reconstruct_transmission_networks_findpairsoutput.png")
+```
+-->
+
+## Output of `find.pairs.in.networks`
+
+Let us have a look at the selected pairs:
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_dpl.png")
+```
+
+Here,
+
+1. Columns `H1` and `H2` list the individual ID of the individuals between whom
+   linkage is not rejected.
+2. Column `PTY_RUN` lists the batch number of the phyloscanner analysis
+   specified through `batch.regex`.
+3. Column `NEFF` gives the total number of deep-sequence phylogenies for the
+   pair, and `KEFF` gives the total number of phylogenies in which the two
+   individuals have virus that is `close.and.adjacent`.
+   
+Now let us have a look at the relationship stats for all phylogenies:   
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_dw.png")
+```
+
+Here,
+
+1. Columns `H1`, `H2` and `PTY_RUN` are as before.
+2. Column `TREE_ID` gives the identifier of the phylogeny for which the
+   *phyloscanner* statistics are evaluated.
+3. Columns `ADJACENT`, `CONTIGUOUS`, `PATHS12`, `PATHS21`, `ANCESTRY`,
+   `PATRISTIC_DISTANCE` give the basic *phyloscanner* statistics that describe
+   the relationship of virus from two individuals in this phylogeny.
+4. The remaining columns list all the different classification schemes that
+   *phyloscanner* produces by default (e.g. `PROXIMITY-3_WAY_CAT`), and the
+   classifications in rows (e.g. `close`).
+
+Now let us have a look at the relationship counts summed over all
+phylogenies:   
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_dc.png")
+```
+
+Here,
+
+1. Columns `H1`, `H2` and `PTY_RUN` are as before.
+2. Column `CATEGORISATION` then lists one of the classification schemes and
+   column `TYPE` all of the classifications in this scheme.
+3. For each classification, `NEFF` gives the total number of deep-sequence
+   phylogenies for the pair, and `KEFF` gives the total number of phylogenies in
+   which the two individuals have that classification. 
+
+## Plotting the phylogenetic relationships between pairs of individuals
+At this point, we can easily visualise the phylogenetic relationships between
+two individuals across the genome. We call these plots phyloscans:
+```{r, include=TRUE, eval=FALSE, echo=TRUE, tidy=TRUE}
+# plot phyloscans of all likely pairs
+hosts <- dw %>% select(H1, H2) %>% 
+		gather('HOST_TYPE','H') %>% 
+		select(-HOST_TYPE) %>%
+		distinct() %>%
+		arrange(H) %>%
+		pull(H)
+# plot phyloscans of one pair
+hosts <- dw[1,]  %>% select(H1, H2) %>% 
+		gather('HOST_TYPE','H') %>% 
+		select(-HOST_TYPE) %>% 
+		pull(H)
+tmp <- copy(dw)
+tmp	<- produce.pairwise.graphs2(NULL, hosts=hosts, dwin=tmp, inclusion = "both")
+tmp$graph
+```    
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE, out.width = "100%"}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_phyloscan.pdf")
+```
+
+
+## Reconstructing transmission networks
+We can now reconstruct transmission networks, which are defined as sets of
+individuals between whom phylogenetic linkage is not excluded. Between any two
+individuals in a network, there are three weighted edges that describe the
+phylogenetic support for transmission in the one direction, in the other
+direction, and support for phylogenetic linkage without evidence for
+directionality.
+
+The same function also reconstructs the most likely transmission chain for each
+network. A transmission chain is defined as the directed graph with nodes of
+indegree 1 and arbitrary outdegree that connects all individuals and has the
+largest product of edge weights. Full details are given in the [the Rakai
+paper](https://www.nature.com/articles/s41467-019-09139-4).
+
+Finally, from the transmission chains it is straightforward to extract highly
+supported pairs. 
+
+Here is the code:
+```{r, include=TRUE, eval=FALSE, echo=TRUE, tidy=FALSE, results='hide'}
+#	construct networks between pairs using the same 
+#	control options as before
+tmp <- find.networks(dc, control=control, verbose=TRUE)
+#	extract networks and the transmission chains within them
+dnet <- copy(tmp$transmission.networks)
+dchain <- copy(tmp$most.likely.transmission.chains)
+
+#	construct highly supported pairs
+conf.cut <- 0.6
+dconfpairs <- dchain %>% 
+		filter( SCORE_LINKED>conf.cut & 
+				pmax(SCORE_DIR_12, SCORE_DIR_21)>conf.cut) %>%
+		select(-c(SCORE_NW_12, SCORE_NW_12, SCORE_NW_AMB))
+
+#	save
+#save(dpl, dc, dw, dnet, dchain, dconfpairs, file=file.nets)
+```
+
+<!-- 
+The above should produce the following output or similar (when `verbose=TRUE`):
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_findnetsoutput.png")
+```
+-->
+
+## Output from reconstructing transmission networks
+Let us have a look at the transmission networks:
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_dnet.png")
+```
+
+Here,
+
+1. Columns `H1` and `H2` list as before the individual ID of the individuals
+   between whom linkage is not rejected.
+2. Column `PTY_RUN` lists as before the batch number of the phyloscanner
+   analysis specified through `batch.regex`.
+3. Column `IDCLU` gives an identifier for each reconstructed transmission
+   network.
+4. Column `TYPE` gives the possible classifications of the phylogenetic
+   relationship types between the individuals. Using the default options in
+   `control`, this is evidence for transmission in either direction (`12` and
+   `21`), support for phylogenetic linkage without evidence for directionality
+   (`complex.or.no.ancestry`), and evidence for no phylogenetic linkage
+   (`not.close.or.nonadjacent`). 
+5. Column `SCORE` gives the *phyloscanner* score for each classification, which
+   is defined by `KEFF` divided by `NEFF`.
+   
+
+Let us plot one of these networks. The code to do this is as follows:   
+
+```{r, include=TRUE, eval=FALSE, echo=TRUE, tidy=FALSE, fig.align="center"}
+# plot all networks
+idclus <- sort(unique(dnet$IDCLU))
+# find IDs of all networks
+control<- list()
+control$point.size = 10
+control$edge.gap = 0.04
+control$edge.size = 2
+control$curvature = -0.2
+control$arrow = arrow(length = unit(0.04, "npc"), type = "open")
+control$curv.shift = 0.06
+control$label.size = 3
+# the above options may need to be changed, depending on the 
+# size of the networks and the size of your pdf output
+control$node.label = "H" 
+# specify the column in 'di' below that should be used as nodel label
+control$node.fill = NA_character_
+control$node.fill.values = c(`NA` = "steelblue2")
+# specify the background colour for each node
+control$node.shape = NA_character_
+control$node.shape.values = c(`NA` = 16)
+# specify the shape for each node
+control$threshold.linked = 0.6
+# edges will be highlighted in darkgrey if 'SCORE_LINKED' is above this threshold 
+pns		<- lapply(seq_along(idclus), function(i)
+		{
+			idclu <- idclus[i]
+			df <- dnet %>% 
+					filter(IDCLU == idclu)
+			di <- df %>% 
+					select(H1, H2) %>% 
+					gather('HOST_TYPE','H') %>% 
+					select(-HOST_TYPE) %>%
+					distinct() 				
+			p <- plot.network(df, di, control)					
+			p	
+		})
+pns[[10]]
+```
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE, out.width = "80%"}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_dnetplot.pdf")
+```  
+
+## Output from reconstructing transmission chains   
+Now, let us have a look at the corresponding transmission chains:
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_dchain.png")
+```
+
+Here,
+
+1. Columns `H1`, `H2`, `PTY_RUN` and `IDCLU` are as before.
+2. Column `LINK_12` states if there is a directed edge from `H1` to `H2` in the
+   most likely transmission chain, and `LINK_21` states if there is a directed
+   edge in the other direction.
+3. Column `SCORE_LINKED` gives the *phyloscanner* score for phylogenetic
+   linkage. Using the default options, this is the sum of `KEFF` for `12`, `21`
+   and `complex.or.no.ancestry`, divided by `NEFF`.
+4. Column `SCORE_DIR_12` gives the *phyloscanner* score for transmission
+   direction `H1` to `H2` among phylogenies supporting phylogenetic linkage.
+   Column `SCORE_DIR_21` gives the *phyloscanner* score for transmission in the
+   opposite direction.
+  
+We can also plot the transmission chains. The code to do this is as follows:
+
+```{r, include=TRUE, eval=FALSE, echo=TRUE, tidy=FALSE}
+# plot corresponding most likely chains	
+pcs		<- lapply(seq_along(idclus), function(i)
+		{
+			idclu <- idclus[i]
+			control$layout <- pns[[i]][['layout']] 
+			df <- dchain %>% 
+					filter(IDCLU == idclu)
+			di <- df %>% 
+					select(H1, H2) %>% 
+					gather('HOST_TYPE','H') %>% 
+					select(-HOST_TYPE) %>%
+					distinct() 
+			p <- plot.chain(df, di, control)					
+			p	
+		})
+pcs[[10]]
+```  
+```{r, out.width="\\linewidth", include=TRUE, fig.align="center", echo=FALSE, out.width = "80%"}
+knitr::include_graphics("figures/UVRI.03.reconstruct_transmission_networks_dchainplot.pdf")
+```  
+
+  
+## Final notes
+  
+1. It is also easy to add any further individual-level meta-data to the analysis
+   output. Specify the `dmeta` input variable `find.pairs.in.networks`. 
+	
\ No newline at end of file
diff --git a/phyloscannerR/vignettes/phyloscannerR_reconstruct_transmission_networks.pdf b/phyloscannerR/vignettes/phyloscannerR_reconstruct_transmission_networks.pdf
new file mode 100644
index 00000000..5343f54b
Binary files /dev/null and b/phyloscannerR/vignettes/phyloscannerR_reconstruct_transmission_networks.pdf differ