Functions are reusable blocks of code that perform specific tasks. In R, functions take inputs (called arguments or parameters), process them, and return outputs. They're essential for organizing your code, avoiding repetition, and making your scientific analysis more efficient and reproducible.
In scientific analysis, you often need to perform the same calculations repeatedly - calculating means, converting units, applying statistical tests, or processing data in the same way. Functions let you write the code once and use it many times.
function_name <- function(argument1, argument2, ...) {
# Code that does something with the arguments
result <- some_calculation
return(result) # Optional - R returns the last expression by default
}# Convert Celsius to Fahrenheit
celsius_to_fahrenheit <- function(celsius) {
fahrenheit <- (celsius * 9/5) + 32
return(fahrenheit)
}
# Usage
lab_temp <- 25 # Celsius
lab_temp_f <- celsius_to_fahrenheit(lab_temp)
print(lab_temp_f) # 77
# Convert multiple temperatures
temps_c <- c(20, 25, 30, 35)
temps_f <- celsius_to_fahrenheit(temps_c)
print(temps_f) # 68 77 86 95# Beer-Lambert law: A = ε * c * l
# Solving for concentration: c = A / (ε * l)
calculate_concentration <- function(absorbance, extinction_coeff, path_length) {
concentration <- absorbance / (extinction_coeff * path_length)
return(concentration)
}
# Usage
abs_reading <- 0.75
epsilon <- 1500 # L/(mol·cm)
path_len <- 1 # cm
conc <- calculate_concentration(abs_reading, epsilon, path_len)
print(paste("Concentration:", conc, "mol/L"))You can provide default values for arguments:
# Calculate standard error with optional confidence level
standard_error <- function(data, confidence_level = 0.95) {
n <- length(data)
se <- sd(data) / sqrt(n)
# Calculate margin of error
t_value <- qt((1 + confidence_level) / 2, df = n - 1)
margin_error <- t_value * se
return(list(
standard_error = se,
margin_of_error = margin_error,
confidence_level = confidence_level
))
}
# Usage with default 95% confidence
plant_heights <- c(15.2, 16.1, 14.8, 15.9, 16.3, 15.7, 16.0, 15.5)
result_95 <- standard_error(plant_heights)
# Usage with custom confidence level
result_99 <- standard_error(plant_heights, confidence_level = 0.99)Use lists to return multiple results:
# Calculate descriptive statistics for experimental data
descriptive_stats <- function(data) {
stats <- list(
n = length(data),
mean = mean(data),
median = median(data),
sd = sd(data),
min = min(data),
max = max(data),
range = range(data),
q1 = quantile(data, 0.25),
q3 = quantile(data, 0.75)
)
return(stats)
}
# Usage
reaction_times <- c(12.3, 11.8, 12.7, 11.9, 12.1, 12.4, 11.7, 12.2)
summary_stats <- descriptive_stats(reaction_times)
# Access specific results
print(paste("Mean reaction time:", summary_stats$mean))
print(paste("Standard deviation:", summary_stats$sd))# Classify pH levels and suggest treatments
classify_ph <- function(ph_value) {
if (ph_value < 6.0) {
classification <- "Acidic"
treatment <- "Add lime to raise pH"
} else if (ph_value > 8.0) {
classification <- "Alkaline"
treatment <- "Add sulfur to lower pH"
} else {
classification <- "Neutral"
treatment <- "No treatment needed"
}
return(list(
ph = ph_value,
classification = classification,
recommendation = treatment
))
}
# Usage
soil_samples <- c(5.2, 6.8, 7.1, 8.5, 6.2)
for (ph in soil_samples) {
result <- classify_ph(ph)
cat("pH", result$ph, "is", result$classification, "-", result$recommendation, "\n")
}Good functions check for valid inputs:
# Calculate body mass index with error checking
calculate_bmi <- function(weight_kg, height_m) {
# Check for valid inputs
if (!is.numeric(weight_kg) || !is.numeric(height_m)) {
stop("Weight and height must be numeric values")
}
if (weight_kg <= 0 || height_m <= 0) {
stop("Weight and height must be positive values")
}
if (height_m > 3) {
warning("Height seems unusually large. Please check if height is in meters.")
}
bmi <- weight_kg / (height_m^2)
# Classify BMI
if (bmi < 18.5) {
category <- "Underweight"
} else if (bmi < 25) {
category <- "Normal weight"
} else if (bmi < 30) {
category <- "Overweight"
} else {
category <- "Obese"
}
return(list(bmi = round(bmi, 1), category = category))
}
# Usage
subject_bmi <- calculate_bmi(70, 1.75)
print(paste("BMI:", subject_bmi$bmi, "Category:", subject_bmi$category))# Function to clean and summarize experimental data
analyze_experiment <- function(data, group_col, measure_col) {
# Remove missing values
clean_data <- data[complete.cases(data[c(group_col, measure_col)]), ]
# Calculate group statistics
results <- by(clean_data[[measure_col]], clean_data[[group_col]], function(x) {
list(
n = length(x),
mean = mean(x),
sd = sd(x),
se = sd(x) / sqrt(length(x))
)
})
return(results)
}
# Create sample data
experiment_data <- data.frame(
treatment = rep(c("Control", "Treatment_A", "Treatment_B"), each = 6),
growth_rate = c(2.1, 2.3, 1.9, 2.2, 2.0, 2.1, # Control
3.1, 2.9, 3.3, 3.0, 3.2, 2.8, # Treatment A
2.8, 3.0, 2.7, 2.9, 3.1, 2.6) # Treatment B
)
# Usage
results <- analyze_experiment(experiment_data, "treatment", "growth_rate")
print(results)# Create a function generator for unit conversions
make_converter <- function(factor, offset = 0) {
function(x) {
x * factor + offset
}
}
# Create specific converters
kg_to_pounds <- make_converter(2.20462)
cm_to_inches <- make_converter(0.393701)
celsius_to_kelvin <- make_converter(1, 273.15)
# Usage
weight_lbs <- kg_to_pounds(75) # 165.35 pounds
temp_kelvin <- celsius_to_kelvin(25) # 298.15 K# Calculate coefficient of variation for multiple datasets
compare_variability <- function(..., method = "cv") {
datasets <- list(...)
results <- sapply(datasets, function(x) {
if (method == "cv") {
# Coefficient of variation
(sd(x) / mean(x)) * 100
} else if (method == "range") {
# Range as percentage of mean
(max(x) - min(x)) / mean(x) * 100
}
})
return(results)
}
# Usage
group1 <- c(12.1, 12.3, 12.0, 12.4, 12.2)
group2 <- c(15.1, 14.8, 15.3, 14.9, 15.2)
group3 <- c(18.5, 17.2, 19.1, 16.8, 18.9)
variability <- compare_variability(group1, group2, group3, method = "cv")
print(variability)# Good
calculate_reaction_rate <- function(concentration, time) { ... }
# Less clear
calc_rate <- function(c, t) { ... }#' Calculate Michaelis-Menten kinetics parameters
#'
#' @param substrate_conc Vector of substrate concentrations
#' @param reaction_rate Vector of corresponding reaction rates
#' @return List containing Vmax and Km estimates
calculate_km_vmax <- function(substrate_conc, reaction_rate) {
# Function implementation
}validate_concentration <- function(conc) {
if (any(conc < 0)) {
stop("Concentrations cannot be negative")
}
if (any(is.na(conc))) {
warning("Missing values detected in concentration data")
}
}perform_t_test <- function(group1, group2, paired = FALSE, alpha = 0.05) {
result <- t.test(group1, group2, paired = paired)
return(list(
statistic = result$statistic,
p_value = result$p.value,
significant = result$p.value < alpha,
confidence_interval = result$conf.int,
interpretation = ifelse(result$p.value < alpha,
"Statistically significant difference",
"No significant difference")
))
}Functions make your R code more organized, reusable, and easier to debug. They're particularly valuable in scientific work where you need to apply the same analysis methods to different datasets or repeat calculations with different parameters.