# Useful R functions for ecologists

Every biologist using **R** has some self-written functions he particularly likes and which are useful in many different analysis. Here i am sharing seven nifty functions, which have proven to be useful for me in the past. Many can be used in different contexts and their functionalities are certainly missing in the R base package.

# The function below uses the MASS package to fit provided values to given often used # distributions. AIC and BIC information criterion values are calculated and returned in an ordered table testdist <- function(values){ require(MASS) distributions <- c("normal","lognormal","exponential","logistic","cauchy","gamma","geometric","weibull") res <- data.frame(cbind(distributions)); res[,c("AIC","BIC")] <- NA for(i in seq(1:nrow(res))){ fit <- fitdistr(values,densfun=as.character(res$distributions[i]),) res$AIC[i] <- AIC(fit);res$BIC[i] <- BIC(fit) } res <- res[order(res$BIC,decreasing=F,na.last=T),] return(res) } #Example output: data(trees) testdist(trees$Height) #distributions AIC BIC #weibull 204.6404 207.5083 #normal 205.7745 208.6425 #gamma 206.4929 209.3609 #lognormal 206.9348 209.8028 #logistic 207.0618 209.9298 #cauchy 218.2213 221.0893 #exponential 332.5055 333.9395 #geometric 332.5107 333.9447 # Those values shouldn't be taken for granted and you should always visually explore potential distributions # for instance with histograms or qqplots !

# Standarderror of the mean (SEM) stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x))) #Example output: stderr(trees$Height) # > 1.144411

# Do segments on top of Scatter-Plot. # Requires a transmitted data.frame with x-y values and a vector with line-length doSegments <- function(x,y,ll,eps=0.05,...){ # Further Arguments are transmitted to segments segments(x,y-ll,x,y+ll) # Build lines segments(x-eps,y-ll,x+eps,y-ll) # Do the segments on top segments(x-eps,y+ll,x+eps,y+ll) # and below } #Example output: plot(trees$Girth,trees$Volume) fit <- lm(trees$Volume~trees$Girth) # Displays the residuals of a linear regression as errorbar doSegments(trees$Girth,trees$Volume,ll=resid(fit) )

# Returns a list of the elements of x that are not in y # and the elements of y that are not in x (not the same thing...) setdiff2 <- function(x,y) { Xdiff = setdiff(x,y) Ydiff = setdiff(y,x) list(X_not_in_Y=Xdiff, Y_not_in_X=Ydiff) } # Example output a <- c("A","B","C","D") b <- c("C","D","E","F") setdiff2(a,b) #> $X_not_in_Y #> [1] "A" "B" #> $Y_not_in_X #> [1] "E" "F"

# Remove all NA-Values from a list remNa <- function(x) { return(subset(x,complete.cases(x))) } # Example Output a <- c("A","B",NA,"D") remNa(a) #> [1] "A" "B" "D"

## String manipulation # trim white space/tabs trim_whitespace <-function(s) gsub("^[[:space:]]+|[[:space:]]+$","",s) # Example output a <- " Teststring " trim_whitespace(a) #> [1] "Teststring" # Extract numbers from string or character numbers_from_string <- function(x) as.numeric(gsub("\\D", "", x)) # Example output a <- "We found 13 rabbits playing on the field" numbers_from_string(a) #> [1] 13