# Basic if statement
<- 85
score
if (score >= 80) {
print("You passed!")
}
[1] "You passed!"
# The condition must evaluate to a single TRUE or FALSE
<- 10
x if (x > 5) {
cat("x is greater than 5\n")
}
x is greater than 5
Control structures allow you to control the flow of execution in your R programs. They enable you to:
While R’s vectorized operations often eliminate the need for explicit loops, understanding control structures is essential for more complex programming tasks.
if
StatementThe most basic control structure tests a condition and executes code if the condition is true:
# Basic if statement
<- 85
score
if (score >= 80) {
print("You passed!")
}
[1] "You passed!"
# The condition must evaluate to a single TRUE or FALSE
<- 10
x if (x > 5) {
cat("x is greater than 5\n")
}
x is greater than 5
if-else
StatementUse else
to specify what happens when the condition is false:
# Simple if-else
<- 75
temperature
if (temperature > 80) {
print("It's hot outside!")
else {
} print("It's not too hot.")
}
[1] "It's not too hot."
# Another example
<- 17
age
if (age >= 18) {
<- "adult"
status else {
} <- "minor"
status
}
cat("Status:", status, "\n")
Status: minor
else if
Chain multiple conditions together:
# Grade assignment
<- 87
score
if (score >= 90) {
<- "A"
grade else if (score >= 80) {
} <- "B"
grade else if (score >= 70) {
} <- "C"
grade else if (score >= 60) {
} <- "D"
grade else {
} <- "F"
grade
}
cat("Your grade is:", grade, "\n")
Your grade is: B
if
StatementsYou can nest if statements inside other if statements:
# Weather decision making
<- 75
temperature <- FALSE
is_raining
if (temperature > 70) {
if (is_raining) {
print("Warm but rainy - bring an umbrella!")
else {
} print("Perfect weather for a walk!")
}else {
} if (is_raining) {
print("Cold and rainy - stay inside!")
else {
} print("Cold but dry - wear a jacket!")
} }
[1] "Perfect weather for a walk!"
ifelse()
For vectorized operations, use ifelse()
:
# Apply condition to entire vector
<- c(85, 92, 78, 96, 73, 88)
scores
# Traditional if would only work with single values
# This creates a vector of results
<- ifelse(scores >= 80, "Pass", "Fail")
results print(results)
[1] "Pass" "Pass" "Fail" "Pass" "Fail" "Pass"
# More complex example
<- c(68, 75, 82, 79, 71)
temperatures <- ifelse(temperatures > 80, "Hot",
weather_description ifelse(temperatures > 70, "Warm", "Cool"))
print(weather_description)
[1] "Cool" "Warm" "Hot" "Warm" "Warm"
# Numeric results
<- ifelse(scores >= 90, scores * 0.1, 0)
bonus print(bonus)
[1] 0.0 9.2 0.0 9.6 0.0 0.0
<- 25
age <- 45000
income <- TRUE
has_degree
# AND operator (&)
if (age >= 18 & income > 30000) {
print("Eligible for loan")
}
[1] "Eligible for loan"
# OR operator (|)
if (age < 18 | age > 65) {
print("Special rate applies")
}
# NOT operator (!)
if (!has_degree) {
print("Degree required")
else {
} print("Education requirement met")
}
[1] "Education requirement met"
# Complex conditions
if ((age >= 21 & age <= 65) & (income > 40000 | has_degree)) {
print("Premium membership available")
}
[1] "Premium membership available"
<- 10
x <- 20
y
# All comparison operators
print(x == y) # Equal to
[1] FALSE
print(x != y) # Not equal to
[1] TRUE
print(x < y) # Less than
[1] TRUE
print(x <= y) # Less than or equal to
[1] TRUE
print(x > y) # Greater than
[1] FALSE
print(x >= y) # Greater than or equal to
[1] FALSE
# Using with strings
<- "Alice"
name1 <- "Bob"
name2 print(name1 == name2)
[1] FALSE
print(name1 < name2) # Alphabetical comparison
[1] TRUE
for
Loopfor
loops iterate over a sequence of values:
# Basic for loop
for (i in 1:5) {
print(paste("Iteration:", i))
}
[1] "Iteration: 1"
[1] "Iteration: 2"
[1] "Iteration: 3"
[1] "Iteration: 4"
[1] "Iteration: 5"
# Loop over vector elements
<- c("apple", "banana", "cherry")
fruits for (fruit in fruits) {
print(paste("I like", fruit))
}
[1] "I like apple"
[1] "I like banana"
[1] "I like cherry"
# Loop with calculations
<- numeric(5) # Pre-allocate vector
squares for (i in 1:5) {
<- i^2
squares[i]
}print(squares)
[1] 1 4 9 16 25
for
Loop Examples# Calculate cumulative sum
<- c(5, 10, 15, 20, 25)
numbers <- numeric(length(numbers))
cumulative 1] <- numbers[1]
cumulative[
for (i in 2:length(numbers)) {
<- cumulative[i-1] + numbers[i]
cumulative[i]
}print(cumulative)
[1] 5 15 30 50 75
# Compare with built-in function
print(cumsum(numbers)) # R's built-in cumulative sum
[1] 5 15 30 50 75
# Process data frame rows
<- data.frame(
students name = c("Alice", "Bob", "Charlie"),
midterm = c(85, 78, 92),
final = c(88, 82, 89)
)
for (i in 1:nrow(students)) {
<- 0.4 * students$midterm[i] + 0.6 * students$final[i]
overall cat(students$name[i], "overall grade:", round(overall, 1), "\n")
}
Alice overall grade: 86.8
Bob overall grade: 80.4
Charlie overall grade: 90.2
while
Loopwhile
loops continue until a condition becomes false:
# Basic while loop
<- 1
counter while (counter <= 5) {
print(paste("Count:", counter))
<- counter + 1
counter }
[1] "Count: 1"
[1] "Count: 2"
[1] "Count: 3"
[1] "Count: 4"
[1] "Count: 5"
# While loop with condition
<- 1000
balance <- 0.05
interest_rate <- 0
year
while (balance < 2000) {
<- balance * (1 + interest_rate)
balance <- year + 1
year
}
cat("It takes", year, "years for balance to exceed $2000\n")
It takes 15 years for balance to exceed $2000
cat("Final balance: $", round(balance, 2), "\n")
Final balance: $ 2078.93
break
and next
# Using break to exit loop early
for (i in 1:10) {
if (i == 6) {
print("Breaking at 6")
break
}print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] "Breaking at 6"
# Using next to skip iterations
for (i in 1:10) {
if (i %% 2 == 0) { # Skip even numbers
next
}print(paste("Odd number:", i))
}
[1] "Odd number: 1"
[1] "Odd number: 3"
[1] "Odd number: 5"
[1] "Odd number: 7"
[1] "Odd number: 9"
In R, vectorized operations are usually preferred over loops:
# Calculate squares using loop (slower)
<- 1000
n <- numeric(n)
squares_loop system.time({
for (i in 1:n) {
<- i^2
squares_loop[i]
} })
user system elapsed
0.001 0.000 0.001
# Calculate squares using vectorization (faster)
system.time({
<- (1:n)^2
squares_vector })
user system elapsed
0 0 0
# Both give same result
identical(squares_loop, squares_vector)
[1] TRUE
# More examples of vectorization
<- 1:100
numbers
# Instead of loop for condition
# Loop approach:
# result <- numeric(100)
# for (i in 1:100) {
# if (numbers[i] > 50) result[i] <- "high" else result[i] <- "low"
# }
# Vectorized approach:
<- ifelse(numbers > 50, "high", "low") result
switch
StatementFor multiple discrete choices, switch
can be cleaner than multiple if-else
:
# Basic switch with character
<- function(day) {
day_type switch(day,
"Monday" = "Start of work week",
"Tuesday" = "Work day",
"Wednesday" = "Hump day",
"Thursday" = "Almost there",
"Friday" = "TGIF!",
"Saturday" = "Weekend!",
"Sunday" = "Weekend!",
"Unknown day" # Default case
)
}
print(day_type("Friday"))
[1] "TGIF!"
print(day_type("Sunday"))
[1] "Weekend!"
print(day_type("Holiday"))
[1] "Unknown day"
# Switch with numeric values
<- function(letter_grade) {
grade_points switch(letter_grade,
"A" = 4.0,
"B" = 3.0,
"C" = 2.0,
"D" = 1.0,
"F" = 0.0,
NA # Default for invalid grades
)
}
print(grade_points("A"))
[1] 4
print(grade_points("C"))
[1] 2
print(grade_points("X"))
[1] NA
# Simulate messy data
<- data.frame(
raw_data age = c(25, -5, 150, 30, 22),
income = c(50000, 75000, -1000, 85000, 45000),
score = c(85, 92, 78, 150, 88)
)
# Clean the data using control structures
<- raw_data
cleaned_data
for (i in 1:nrow(cleaned_data)) {
# Validate age
if (cleaned_data$age[i] < 0 | cleaned_data$age[i] > 120) {
$age[i] <- NA
cleaned_datacat("Invalid age in row", i, "- set to NA\n")
}
# Validate income
if (cleaned_data$income[i] < 0) {
$income[i] <- NA
cleaned_datacat("Invalid income in row", i, "- set to NA\n")
}
# Validate score (0-100 range)
if (cleaned_data$score[i] < 0 | cleaned_data$score[i] > 100) {
$score[i] <- NA
cleaned_datacat("Invalid score in row", i, "- set to NA\n")
} }
Invalid age in row 2 - set to NA
Invalid age in row 3 - set to NA
Invalid income in row 3 - set to NA
Invalid score in row 4 - set to NA
print("Original data:")
[1] "Original data:"
print(raw_data)
age income score
1 25 50000 85
2 -5 75000 92
3 150 -1000 78
4 30 85000 150
5 22 45000 88
print("Cleaned data:")
[1] "Cleaned data:"
print(cleaned_data)
age income score
1 25 50000 85
2 NA 75000 92
3 NA NA 78
4 30 85000 NA
5 22 45000 88
# Simulate rolling dice until we get a 6
set.seed(123)
<- function() {
simulate_dice <- 0
rolls <- 0
result
while (result != 6) {
<- sample(1:6, 1)
result <- rolls + 1
rolls cat("Roll", rolls, ":", result, "\n")
}
return(rolls)
}
cat("Simulation 1:\n")
Simulation 1:
<- simulate_dice() rolls_needed
Roll 1 : 3
Roll 2 : 6
cat("Took", rolls_needed, "rolls to get a 6\n\n")
Took 2 rolls to get a 6
# Run multiple simulations
<- 1000
num_simulations <- numeric(num_simulations)
all_rolls
for (i in 1:num_simulations) {
<- 0
rolls <- 0
result
while (result != 6) {
<- sample(1:6, 1)
result <- rolls + 1
rolls
}
<- rolls
all_rolls[i]
}
cat("Average rolls needed:", mean(all_rolls), "\n")
Average rolls needed: 6.035
cat("Maximum rolls needed:", max(all_rolls), "\n")
Maximum rolls needed: 46
cat("95% of games finished within", quantile(all_rolls, 0.95), "rolls\n")
95% of games finished within 17 rolls
# Survey responses
<- data.frame(
survey_data respondent = 1:10,
satisfaction = c(5, 4, 3, 5, 2, 4, 5, 3, 4, 5),
recommend = c("Yes", "Yes", "No", "Yes", "No", "Maybe", "Yes", "No", "Yes", "Yes"),
comments = c("Great!", "", "Could be better", "Excellent", "Poor service",
"", "Amazing!", "Not satisfied", "", "Very good")
)
# Process and categorize responses
<- survey_data
processed_data
for (i in 1:nrow(processed_data)) {
# Categorize satisfaction scores
if (processed_data$satisfaction[i] >= 4) {
$satisfaction_category[i] <- "High"
processed_dataelse if (processed_data$satisfaction[i] >= 3) {
} $satisfaction_category[i] <- "Medium"
processed_dataelse {
} $satisfaction_category[i] <- "Low"
processed_data
}
# Convert recommendation to numeric score
<- switch(processed_data$recommend[i],
rec_score "Yes" = 3,
"Maybe" = 2,
"No" = 1,
0 # Default for unexpected values
)$recommend_score[i] <- rec_score
processed_data
# Flag responses with comments
$has_comment[i] <- processed_data$comments[i] != ""
processed_data
}
# Summary analysis
cat("Satisfaction Distribution:\n")
Satisfaction Distribution:
print(table(processed_data$satisfaction_category))
High Low Medium
7 1 2
cat("\nRecommendation Distribution:\n")
Recommendation Distribution:
print(table(processed_data$recommend))
Maybe No Yes
1 3 6
cat("\nPercent with comments:",
round(mean(processed_data$has_comment) * 100, 1), "%\n")
Percent with comments: 70 %
# Find dissatisfied customers with comments
<- processed_data[
dissatisfied_with_comments $satisfaction <= 2 & processed_data$has_comment,
processed_datac("respondent", "satisfaction", "comments")
]
if (nrow(dissatisfied_with_comments) > 0) {
cat("\nDissatisfied customers with comments:\n")
print(dissatisfied_with_comments)
}
Dissatisfied customers with comments:
respondent satisfaction comments
5 5 2 Poor service
# Control structure appropriate: Fibonacci sequence
<- function(n) {
fibonacci if (n <= 2) return(1)
<- numeric(n)
fib 1:2] <- 1
fib[
for (i in 3:n) {
<- fib[i-1] + fib[i-2]
fib[i]
}
return(fib)
}
print(fibonacci(10))
[1] 1 1 2 3 5 8 13 21 34 55
# Vectorization appropriate: Simple transformations
<- 1:1000
data <- data^2 # Much faster than loop
squared <- log(data) # Vectorized function log_values
# Bad: Growing vector in loop (slow)
# result <- c()
# for (i in 1:1000) {
# result <- c(result, i^2)
# }
# Good: Pre-allocate (fast)
<- numeric(1000)
result for (i in 1:1000) {
<- i^2
result[i] }
# Instead of loop:
# result <- numeric(length(data))
# for (i in 1:length(data)) {
# result[i] <- data[i] * 2
# }
# Use vectorization:
<- 1:100
data <- data * 2 result
# Dangerous: vector in condition
<- c(85, 92, 78)
scores # if (scores > 80) { ... } # This would cause an error!
# Safe: single logical value
if (any(scores > 80)) {
print("At least one score above 80")
}
[1] "At least one score above 80"
if (all(scores > 80)) {
print("All scores above 80")
}
Write a function that takes a numeric score and returns the letter grade using if-else statements: - A: 90-100 - B: 80-89 - C: 70-79 - D: 60-69 - F: Below 60
Create a simple number guessing game where: 1. The computer picks a random number between 1-100 2. User has to guess (simulate with a loop) 3. Provide “higher” or “lower” hints 4. Count the number of guesses
Given a vector of temperatures, write code that: 1. Uses a loop to categorize each temperature as “Freezing”, “Cold”, “Mild”, “Warm”, or “Hot” 2. Counts how many days fall into each category 3. Compares this approach with a vectorized solution
Write a function that calculates compound interest using a while loop. The function should: 1. Take initial amount, interest rate, and target amount 2. Calculate how many years to reach the target 3. Print yearly balances along the way
Control structures are essential tools for programming logic:
if
, else if
, else
for decision makingfor
and while
for repetitive operationsbreak
and next
for flow controlNext, we’ll explore how to organize code into reusable functions!