mjwestgate
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions b/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎INDEX‎
Lines changed: 1 addition & 3 deletions b/‎INDEX‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 1 addition & 1 deletion b/‎NAMESPACE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/data_processing.R‎
Lines changed: 88 additions & 32 deletions b/‎R/data_processing.R‎
Lines changed: 88 additions & 32 deletions
diff --git a/‎R/line_functions.R‎
Lines changed: 28 additions & 90 deletions b/‎R/line_functions.R‎
Lines changed: 28 additions & 90 deletions
@@ -1,9 +1,9 @@
 Package: circleplot
-Version: 0.3
-Date: 2014-10-08
+Version: 0.4
+Date: 2016-02-03
 Title: Circular plots of distance and association matrices
 Author: Martin J. Westgate <martinjwestgate@gmail.com>
 Maintainer: Martin J. Westgate <martinjwestgate@gmail.com>
 Description: Tools for plotting numeric or binary matrices.
-Depends: R (>= 3.1.0), RColorBrewer
+Depends: R (>= 3.1.0), RColorBrewer, cluster
 License: GPL-2
@@ -1,7 +1,5 @@
-add.key		Draw a simple key for a plot
-calc.overlap	Identify species that are present in >1 datasets stored in a list
-clean.list	Take a list of datasets and return in a standardised form
 circleplot	Draw a plot
+draw.circle	Add a circle to an existing plot
 make.circle	Return a set of points on the circumference of a circle
 make.long.format	Convert a matrix to a data.frame
 make.wide.format	Convert a data.frame to a matrix
 
@@ -1 +1 @@
-export(add.key, calc.overlap, clean.list, circleplot, make.circle, make.long.format, make.wide.format, point.attr)
+export(circleplot, draw.circle, make.circle, make.long.format, make.wide.format, point.attr)
@@ -3,8 +3,9 @@
 
 # function to make a square matrix from a data.frame
 make.wide.format<-function(
-	input	# result from spaa()
+	input	 # result from spaa()
 	){
+	if(class(input)!="data.frame"){stop("make.wide.format only works for class(input)=='data.frame'")}
 	# work out properties of the input
 	spp.names<-unique(c(input[, 1], input[, 2]))
 	n.spp<-length(spp.names)
@@ -33,6 +34,7 @@ make.wide.format<-function(
 
 # function to make a 3-column data.frame from a square matrix (i.e. inverse of make.wide.format)
 make.long.format<-function(input){
+	if(class(input)!="matrix"){stop("make.wide.format only works for class(input)=='matrix'")}
 	# get basic summaries
 	asymmetric<-any(c(input==t(input))==FALSE, na.rm=TRUE)
 	if(length(colnames(input))==0){spp.names<-paste("V", c(1:ncol(input)), sep="")
@@ -59,48 +61,102 @@ make.long.format<-function(input){
 	}
 
 
-# Functions on lists
+# function to take an input (preferably in long format) and return a sensible distance matrix
+make.dist.format<-function(input){
+	# get objects
+	if(any(c("matrix", "data.frame")==class(input))==FALSE){
+		stop("make.dist.format only accepts class matrix or data.frame")}
+	if(class(input)=="matrix"){
+		wide<-input
+		long<-make.long.format(input)}
+	if(class(input)=="data.frame"){
+		wide<-make.wide.format(input)
+		long<-input}
+	# remove infinite values
+	if(any(long[, 3]==Inf, na.rm=TRUE)){
+		replace.locs<-which(long[, 3]==Inf)
+		replace.vals<-max(long[-replace.locs, 3], na.rm=TRUE)*2
+		long[replace.locs, 3]<-replace.vals}
+	if(any(input[, 3]==-Inf, na.rm=TRUE)){
+		replace.locs<-which(long[, 3]==-Inf)
+		replace.vals<-min(long[-replace.locs, 3], na.rm=TRUE)
+		if(replace.vals<0){replace.vals<-replace.vals*2}else{replace.vals<-replace.vals*0.5}
+		long[replace.locs, 3]<-replace.vals}
+	# make +ve definite
+	if(min(long[, 3], na.rm=TRUE)<0){
+		long[, 3]<-long[, 3]-min(long[, 3], na.rm=TRUE)}
+	# invert to make into a distance
+	long[, 3]<-max(long[, 3], na.rm=TRUE)-long[, 3]
+	# convert to matrix, check for asymmetry
+	asymmetric<-all(wide==t(wide), na.rm=TRUE)==FALSE
+	if(asymmetric){
+		wide.array<-array(data=NA, dim=c(dim(wide), 2))
+		wide.array[,,1]<-wide
+		wide.array[,,2]<-t(wide)
+		wide.array<-apply(wide.array, c(1, 2), sum)
+		colnames(wide.array)<-colnames(wide)
+		rownames(wide.array)<-rownames(wide)
+		result<-as.dist(wide.array)
+	}else{
+		result<-as.dist(wide)}
+	# set na values to the mean (i.e. no effect on clustering)
+	if(any(is.na(result))){
+		result[which(is.na(result))]<-mean(result, na.rm=TRUE)}
+	return(list(asymmetric= asymmetric, dist.matrix=result))
+	}
+
 
 # take a list containing co-occurrence data, and return a list of the same length, 
 # but with only those species shared among all datasets (type="AND") 
 # or all species present in any dataset (type="OR", the default)
-clean.list<-function(x, type="OR"){
-	if(any(c("AND", "OR")==type)==FALSE)stop("Specified 'type' not permitted; please specify AND or OR")
+clean.list<-function(x, reduce=FALSE){
 	# first ensure that data are in the same (wide) format
 	x<-lapply(x, function(y){
 		if(class(y)=="data.frame"){y<-make.wide.format(y)}else{y<-as.matrix(y)}})
 	n<-length(x)
 	comparison<-calc.overlap(x)
-	if(type=="OR"){
-		all.species<-rownames(comparison)
-		for(i in 1:n){
-			y<-x[[i]]
-			missing.rows<-which(comparison[, i]==FALSE)
-			new.cols<-matrix(data=NA, nrow=nrow(y), ncol=length(missing.rows))
-				colnames(new.cols)<-all.species[missing.rows]
-			output <-cbind(y, new.cols)
-			new.rows<-matrix(data=NA, nrow=length(missing.rows), ncol=ncol(output))
-				rownames(new.rows)<-all.species[missing.rows]
-			output <-rbind(output, new.rows)
-			col.order<-order(colnames(output))
-			x[[i]]<-output[col.order, col.order]	
-		}
-	}else{
+	if(reduce){
 		and.test<-apply(comparison, 1, FUN=function(y){any(y==FALSE)==FALSE})
 		keep.rows<-which(and.test)
-		if(length(keep.rows)==0){stop("No species are present in all datasets; try type='OR'")
-		}else{
-			all.species<-rownames(comparison)[keep.rows]
-			for(i in 1:n){
-				y<-x[[i]]
-				keep.cols<-which(sapply(colnames(y), 
-					FUN=function(z, comp){any(comp==z)}, comp=all.species))
-				output<-y[keep.cols, keep.cols]
-				col.order<-order(colnames(output))
-				x[[i]]<-output[col.order, col.order]			
-			}		
-	}}
-	return(x)
+		if(length(keep.rows)==0){stop("No species are present in all datasets; try reduce=FALSE")}
+		all.species<-rownames(comparison)[keep.rows]
+	}else{all.species<-rownames(comparison)}
+
+	# set up a matrix that will be filled with data for entry in x
+	nspp<-length(all.species)
+	empty.matrix<-matrix(data=NA, nspp, nspp)
+		colnames(empty.matrix)<-all.species; rownames(empty.matrix)<-all.species
+
+	# return a list of matrices, each containing all.species in the same order.
+	result<-lapply(x, function(y, fill){
+		spp<-rownames(fill)
+		locations<-sapply(spp, function(z, comp){
+			if(any(comp==z)){return(which(comp==z))}else{return(NA)}}, 
+			comp=rownames(y))
+		initial.list<-as.list(as.data.frame(y))
+		filled.list<-lapply(initial.list, function(z, lookup){z[lookup]}, lookup=locations)
+		empty.list <-as.list(as.data.frame(fill))
+		final.list<-append(filled.list, empty.list[which(is.na(locations))])
+		order.final<-sapply(names(final.list), function(z, lookup){
+			which(lookup==z)}, lookup=colnames(fill))
+		final.matrix<-as.matrix(as.data.frame(final.list[order.final]))
+		rownames(final.matrix)<-spp
+		colnames(final.matrix)<-spp
+		return(final.matrix)
+		}, fill= empty.matrix)
+
+	# clustering stage to go here - if turned off, test for identical rownames, and if missing, switch to alphabetical
+	# perhaps convert list to array, use apply(result, c(1, 2), sum) to get clustering
+	result.array<-array(unlist(result), dim=c(nspp, nspp, length(result)), 
+		dimnames=list(all.species, all.species, names(result))) 
+	result.sum<-apply(result.array, c(1, 2), function(z){sum(z, na.rm=TRUE)})
+	result.dist<-make.dist.format(result.sum)	
+	# return in correct format
+	return(list(
+		wide=result,
+		long=lapply(result, make.long.format),
+		distance= result.dist$dist.matrix,
+		asymmetric= result.dist$asymmetric))
 	}
 
 
 
@@ -6,124 +6,71 @@ draw.curves<-function(x){
 	if(segment.test){
 		segment.list<-append(
 			list(x0= x$x[1:100], x1= x$x[2:101], y0= x$y[1:100], y1= x$y[2:101]),
-			x[-c(1:2, 5)])
+			x[-c(1:2)])
+		segment.list<-segment.list[-which(names(segment.list)=="arrows")]
 		do.call("segments", segment.list)
-	}else{do.call("lines", x[1:4])}
+	}else{
+		if(any(names(x)=="arrows")){x<-x[-which(names(x)=="arrows")]}
+		do.call("lines", x)}
 	}
 
 
 # generate a list in which each entry is a list of line attributes, to pass to draw.curves()
 get.curves<-function(
-	input
+	points, # from calc.circleplot 
+	lines,
+	plot.options # from set.plot.attributes
 	)
 	{
 
 	# calculate inter-point distances, to allow setting of pc.scale (to calculate curvature of lines relative to origin)
-	point.distance<-dist(input$points[, 2:3])
+	point.distance<-dist(points[, c("x", "y")])
 	scale.distance<-point.distance-min(point.distance)
 	scale.distance<-((scale.distance/max(scale.distance))*
-		input$plot.control$line.curvature[2])+input$plot.control$line.curvature[1]
+		plot.options$line.curvature[2])+ plot.options$line.curvature[1]
 	scale.distance<-as.matrix(scale.distance)
 
-	# set line colours & widths. 
-	# Note that this works even for binary matrices, but is later ignored if line.gradient==FALSE
-	line.cuts<-cut(input$lines$value, input$plot.control$line.breaks, 
-		include.lowest=TRUE, right=TRUE, labels=FALSE)
-	input$lines$colour<-input$plot.control$line.cols[line.cuts]
-
-	# new code for setting line widths
-	input$lines$lwd.max<-input$plot.control$line.widths[line.cuts]	
-	input$lines$lwd.min<-input$plot.control$line.widths[line.cuts]*input$plot.control$line.expansion
-	
-	# add line to remove NA values if plot.control$na.control is not a list
-	# this reduces the time taken to draw plots with many NA values
-	if(class(input$plot.control$na.control)!="list"){
-		if(any(is.na(input$lines$value))){
-			input$lines<-input$lines[-which(is.na(input$lines$value)==TRUE), ]}}
-
 	# loop to calculate lines of requisite location and colour
-	# line.list<-apply(input$lines, 1, FUN=function(x, input, distance){calc.lines(x, input, distance)},
-	#	input=input, distance=scale.distance)
-	# for some reason, apply() fails here, while a loop works; implement a loop until this is resolved.
-	line.list <-list()
-	for(i in 1:nrow(input$lines)){line.list[[i]]<-calc.lines(input$lines[i, ], input, distance=scale.distance)}
-
+	line.list<-split(lines, c(1:nrow(lines)))
+	line.list<-lapply(line.list, function(x, input, distance, plot.options){
+		calc.lines(x, input, distance, plot.options)},
+		input=points, distance=scale.distance, plot.options= plot.options)
+	names(line.list)<-apply(lines[, 1:2], 1, function(x){paste(x, collapse="_")})
 	return(line.list)
 	}
 
 
 # function to pass to apply in get.curves() to calculate locations for each line
-calc.lines<-function(lines, input, distance)
+calc.lines<-function(x, points, distance, plot.options)
 	{
-	# sort out line inputs
-	sp1<-as.character(lines[1])
-	sp2<-as.character(lines[2])
-	value<-as.numeric(lines[3])
-	col<-as.character(lines[5])
-	lwd.min<-as.numeric(lines[7])
-	lwd.max<-as.numeric(lines[6])
-
-	# sort out other inputs
-	points<-input$points
-	plot.control<-input$plot.control
-	binary<-input$binary
-	asymmetric<-input$asymmetric
-
 	# sort out coords for this row
-	row1<-which(points$label== sp1)
-	row2<-which(points$label== sp2)
+	row1<-which(points$labels== x$sp1)
+	row2<-which(points$labels== x$sp2)
 	coords<-data.frame(x= points$x[c(row1, row2)], y= points$y[c(row1, row2)])
-	
+
 	# find basic spatial info on these points
 	distance.thisrun<-distance[row1, row2]
 	coords.scaled<-triangle.coords(coords, distance.thisrun) # what coordinates should the curve be fit to?
 
 	# calculate the curve that fits between these points.
-	# Note that if there are an even number of points, some will pass through the intercept, causing earlier code to fail
 	if(coords.scaled$y[2]>0.0001){
 		apex<-curve.apex(coords, distance.thisrun)
 		curve.coords<-fit.quadratic(coords.scaled)
-		new.curve<-as.list(reposition.curve(curve.coords, apex))
+		new.curve<-reposition.curve(curve.coords, apex, coords)
 	}else{	# i.e. if a straight line
 		new.curve<-list(
 			x=seq(coords$x[1], coords$x[2], length.out=101), 
- 				y=seq(coords$y[1], coords$y[2], length.out=101))
+			y=seq(coords$y[1], coords$y[2], length.out=101))
 	} 
 
-	# set NA behaviour
-	if(is.na(value)){
-		if(is.list(plot.control$na.control)){
-			new.curve<-append(new.curve, plot.control$na.control) 
-			new.curve<-append(new.curve, list(direction=as.numeric(lines[4])))
-			}
-	}else{	# i.e. if this line is not a missing value (i.e. most cases).
-
-	# set line widths
-	lwd.range<-lwd.max-lwd.min
-	if(lwd.range>0){
-		# set default line widths (0-1 range) assuming expansion >0
-		x<-seq(-2, 2, length.out=100)
-		line.widths<-dnorm(x, mean=0, sd=0.5)
-		line.widths<-line.widths-min(line.widths)
-		line.widths<-line.widths/max(line.widths)
-		lwd.final<-(line.widths*lwd.range)+lwd.min
-	}else{lwd.final<-lwd.max}
-
 	# ensure that curves run from their start to end point
-	large.x<-which(sqrt(coords$x^2)>10^-3)
-	if(length(large.x)>1){large.x<-1}
-	if(large.x==1){order.test<-new.curve$x[1]-coords$x[large.x]
-	}else{order.test<-new.curve$x[101]-coords$x[large.x]}
-	if(sqrt(order.test^2)>10^-4){
-		new.curve$x<-new.curve$x[101:1]	
-		new.curve$y<-new.curve$y[101:1]}
-	# if direction states that the line be reversed, do so.
-	if(as.numeric(lines[4])==2){
+	first.x<-which.min(sqrt((coords$x[1]-new.curve$x)^2))
+	if(first.x>1){
 		new.curve$x<-new.curve$x[101:1]	
 		new.curve$y<-new.curve$y[101:1]}
 
 	# set line colours
-	if(binary & plot.control$line.gradient){ # for the special case where line colours are set by point colours
+	if(plot.options$line.gradient){ # for the special case where line colours are set by point colours
 		# get line colours from input$points
 		color1<-points$col[row1]
 		color2<-points$col[row2]
@@ -133,15 +80,11 @@ calc.lines<-function(lines, input, distance)
 		# ensure colours are in correct order
 		distance.pos<-sqrt((new.curve$x[1]-points$x[row1])^2)
 		if(distance.pos>0.001){colours.final<-colours.final[100:1]}		
-	}else{colours.final<-col} # in all other cases
+	}else{colours.final<-x$col} # in all other cases
 
 	# export
-	new.curve<-append(new.curve, list(
-		col=as.character(colours.final), 
-		lwd=as.numeric(lwd.final),
-		direction=as.numeric(lines[4])))
-
-	}	# end if(is.na())==F
+	new.curve<-append(new.curve, x[-c(1:3)])
+	if(plot.options$line.gradient){new.curve$col<-colours.final}
 
 	return(new.curve)
 
@@ -150,13 +93,8 @@ calc.lines<-function(lines, input, distance)
 
 
 # function to determine what kind of arrowhead to draw (if any) and then draw result from get.arrows()
-# note: if(asymmetric) is already called; so we only need to know whether an arrow should be drawn, and in which direction
 draw.arrows<-function(x, attr){
-	invisible(switch(x$direction,
-		 1=={draw<-TRUE; reverse<-FALSE},
-		 2=={draw<-TRUE; reverse<-TRUE},
-		 3=={draw<-FALSE; reverse<-NA}))
-	if(draw){
+	if(x$arrows){
 		if(length(x$col)>1){col.final<-x$col[ceiling(101*attr$distance)]
 		}else{col.final<-x$col}
 		polygon(get.arrows(x, attr, reverse), border=NA, col= col.final)}
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-export(add.key, calc.overlap, clean.list, circleplot, make.circle, make.long.format, make.wide.format, point.attr)`
	`1`	`+export(circleplot, draw.circle, make.circle, make.long.format, make.wide.format, point.attr)`