# These are all doubles by default
x <- 1.2
y <- 3.0
z <- 5
# Check their types
typeof(x)[1] "double"
typeof(y)[1] "double"
typeof(z) # Even though 5 looks like an integer, it's a double![1] "double"
In R, everything is an object, and every object has a data type. Understanding data types is fundamental to working effectively with R because it determines what operations you can perform and how R stores and processes your data.
R has four fundamental data types:
Let’s explore each of these in detail.
R distinguishes between two types of numbers:
By default, R treats all numbers as double (also called “numeric”):
# These are all doubles by default
x <- 1.2
y <- 3.0
z <- 5
# Check their types
typeof(x)[1] "double"
typeof(y)[1] "double"
typeof(z) # Even though 5 looks like an integer, it's a double![1] "double"
To create an integer, you must explicitly specify it by adding L after the number:
# Creating integers
a <- 2L
b <- -123456789L
c <- 0L
# Check their types
typeof(a)[1] "integer"
typeof(b)[1] "integer"
typeof(c)[1] "integer"
The difference between integers and doubles affects:
# Mathematical operations
double_result <- 3.5 + 1.2
typeof(double_result)[1] "double"
integer_result <- 3L + 4L
typeof(integer_result) # Still integer![1] "integer"
mixed_result <- 3L + 1.3 # Integer + double = double
typeof(mixed_result)[1] "double"
R can handle very large numbers, but there are limits:
# Scientific notation
large_number <- 1.5e8
print(large_number)[1] 1.5e+08
# Very large numbers become Inf (infinity)
too_large <- 1e1000
print(too_large)[1] Inf
# Very small numbers become 0
too_small <- 1e-1000
print(too_small)[1] 0
# Check if something is numeric
is.numeric(3.14)[1] TRUE
is.numeric("hello")[1] FALSE
# Check specific types
is.integer(5L)[1] TRUE
is.double(5.0)[1] TRUE
# Convert between types
as.integer(3.7) # Truncates, doesn't round![1] 3
as.double(5L) # Convert integer to double[1] 5
Character data represents text and is created using quotation marks:
# Using double quotes
text1 <- "Hello, world!"
# Using single quotes
text2 <- 'This is also a string'
# Mixing quotes (useful when string contains quotes)
text3 <- "She said 'Hello!'"
text4 <- 'He replied "Hi there!"'
print(text1)[1] "Hello, world!"
print(text3)[1] "She said 'Hello!'"
my_string <- "Data Science is awesome!"
# Check type
typeof(my_string)[1] "character"
# Check if it's character
is.character(my_string)[1] TRUE
# Get string length
nchar(my_string)[1] 24
# String concatenation
first_name <- "Jane"
last_name <- "Doe"
full_name <- paste(first_name, last_name)
print(full_name)[1] "Jane Doe"
# Alternative concatenation
full_name2 <- paste0(first_name, " ", last_name) # No space separator
print(full_name2)[1] "Jane Doe"
# Substrings
substr(full_name, 1, 4) # Characters 1 through 4[1] "Jane"
# Numbers to strings
age <- 25
age_text <- as.character(age)
print(age_text)[1] "25"
typeof(age_text)[1] "character"
# Strings to numbers (if they represent numbers)
number_text <- "42"
number_value <- as.numeric(number_text)
print(number_value)[1] 42
typeof(number_value)[1] "double"
# What happens with non-numeric strings?
invalid_number <- as.numeric("hello")
print(invalid_number) # Returns NA (Not Available)[1] NA
Logical data represents TRUE/FALSE values and is fundamental for conditional operations:
# Direct assignment
is_student <- TRUE
has_job <- FALSE
# From comparisons
x <- 10
y <- 5
is_greater <- x > y
is_equal <- x == y
is_not_equal <- x != y
print(is_greater)[1] TRUE
print(is_equal)[1] FALSE
print(is_not_equal)[1] TRUE
R provides six comparison operators:
a <- 6
b <- 3
# All comparison operators
a < b # Less than[1] FALSE
a > b # Greater than[1] TRUE
a <= b # Less than or equal to[1] FALSE
a >= b # Greater than or equal to[1] TRUE
a == b # Equal to[1] FALSE
a != b # Not equal to[1] TRUE
Combine logical values using logical operators:
x <- TRUE
y <- FALSE
# AND operation
x & y # FALSE (both must be TRUE)[1] FALSE
# OR operation
x | y # TRUE (at least one must be TRUE)[1] TRUE
# NOT operation
!x # FALSE (flips the value)[1] FALSE
!y # TRUE[1] TRUE
# Complex logical expressions
age <- 25
has_license <- TRUE
can_drive <- (age >= 16) & has_license
print(can_drive)[1] TRUE
# TRUE = 1, FALSE = 0 in arithmetic operations
TRUE + TRUE # 2[1] 2
FALSE + TRUE # 1[1] 1
TRUE * 5 # 5[1] 5
# Counting TRUE values
scores <- c(85, 92, 78, 96, 88)
passing_grades <- scores >= 80
sum(passing_grades) # Count how many passed[1] 4
mean(passing_grades) # Proportion who passed[1] 0.8
R has several special values you should know about:
Represents missing data:
# Creating NA values
missing_value <- NA
ages <- c(25, 30, NA, 35, 28)
# Check for NA
is.na(missing_value)[1] TRUE
is.na(ages)[1] FALSE FALSE TRUE FALSE FALSE
# Operations with NA
mean(ages) # Returns NA[1] NA
mean(ages, na.rm = TRUE) # Remove NA values first[1] 29.5
Represents “nothing” or absence of a value:
# NULL represents absence
empty_var <- NULL
length(empty_var) # 0[1] 0
# NULL vs NA
is.null(NULL)[1] TRUE
is.null(NA)[1] FALSE
is.na(NULL)logical(0)
is.na(NA)[1] TRUE
Represent positive and negative infinity:
# Division by zero
positive_inf <- 1/0
negative_inf <- -1/0
print(positive_inf)[1] Inf
print(negative_inf)[1] -Inf
# Check for infinity
is.infinite(positive_inf)[1] TRUE
is.finite(positive_inf)[1] FALSE
# Create different types
int_val <- 42L
dbl_val <- 3.14
chr_val <- "hello"
log_val <- TRUE
# typeof() shows the exact type
typeof(int_val)[1] "integer"
typeof(dbl_val)[1] "double"
typeof(chr_val)[1] "character"
typeof(log_val)[1] "logical"
# class() shows the object class
class(int_val)[1] "integer"
class(dbl_val)[1] "numeric"
# Specific type checks
is.numeric(int_val) # TRUE (integers are numeric)[1] TRUE
is.integer(int_val) # TRUE[1] TRUE
is.double(int_val) # FALSE[1] FALSE
is.character(chr_val) # TRUE[1] TRUE
is.logical(log_val) # TRUE[1] TRUE
R can convert between types:
# Explicit conversion
x <- 3.7
as.integer(x) # Truncates to 3[1] 3
as.character(x) # "3.7"[1] "3.7"
as.logical(x) # TRUE (non-zero numbers are TRUE)[1] TRUE
# Converting strings to numbers
text_numbers <- c("1", "2.5", "3")
numeric_values <- as.numeric(text_numbers)
print(numeric_values)[1] 1.0 2.5 3.0
# What happens with invalid conversions?
as.numeric(c("1", "hello", "3")) # Returns c(1, NA, 3)[1] 1 NA 3
R automatically converts types when needed:
# Mixing types in operations
result1 <- 5L + 3.2 # integer + double = double
typeof(result1)[1] "double"
result2 <- TRUE + 5 # logical + numeric = numeric
print(result2) # TRUE becomes 1[1] 6
# result3 <- "Number: " + 5 # This will cause an error!
# Instead, use paste():
result3 <- paste("Number:", 5)
print(result3)[1] "Number: 5"
# Survey responses
respondent_id <- 1:5
age <- c(25, 30, 28, 35, 29)
income <- c(45000, 52000, 48000, 65000, 51000)
satisfied <- c(TRUE, TRUE, FALSE, TRUE, TRUE)
feedback <- c("Great!", "Good service", "Could be better", "Excellent", "Very happy")
# Analysis
avg_age <- mean(age)
avg_income <- mean(income)
satisfaction_rate <- mean(satisfied)
num_responses <- length(respondent_id)
# Create summary
summary_text <- paste("Survey Results:",
"\nAverage age:", round(avg_age, 1),
"\nAverage income: $", format(avg_income, big.mark = ","),
"\nSatisfaction rate:", round(satisfaction_rate * 100, 1), "%",
"\nTotal responses:", num_responses)
cat(summary_text)Survey Results:
Average age: 29.4
Average income: $ 52,200
Satisfaction rate: 80 %
Total responses: 5
# Simulated data with quality issues
temperatures <- c(72, 75, NA, 80, 999, -50, 77)
# Quality checks
valid_range <- temperatures >= 0 & temperatures <= 120
has_missing <- is.na(temperatures)
suspicious_values <- temperatures > 100 | temperatures < 0
# Results
cat("Valid temperatures:", sum(valid_range, na.rm = TRUE), "\n")Valid temperatures: 4
cat("Missing values:", sum(has_missing), "\n")Missing values: 1
cat("Suspicious values:", sum(suspicious_values, na.rm = TRUE), "\n")Suspicious values: 2
# Clean the data
clean_temperatures <- temperatures[valid_range & !has_missing]
print(clean_temperatures)[1] 72 75 80 77
# This might surprise you!
x <- 5
y <- 5L
identical(x, y) # FALSE! Different types[1] FALSE
x == y # TRUE (values are equal)[1] TRUE
# Best practice: be explicit about integer types when needed# Common mistake
numbers_as_text <- c("1", "2", "3")
# numbers_as_text + 1 # This would error!
# Correct approach
numbers <- as.numeric(numbers_as_text)
numbers + 1 # Now this works[1] 2 3 4
# Useful for counting
test_scores <- c(85, 92, 78, 96, 75, 88)
passing_scores <- test_scores >= 80
# Count passing scores
num_passing <- sum(passing_scores)
# Percentage passing
pct_passing <- mean(passing_scores) * 100
cat("Students passing:", num_passing, "\n")Students passing: 4
cat("Percentage passing:", round(pct_passing, 1), "%\n")Percentage passing: 66.7 %
Create variables of each data type and explore their properties:
# Create one variable of each type
my_integer <- ___
my_double <- ___
my_character <- ___
my_logical <- ___
# Check their types using typeof()
# Convert between types using as.* functions
# Try some arithmetic operationsGiven this data about employees, determine the appropriate data type for each variable:
What will be the result and type of each expression?
TRUE + 2
"5" + 3
as.logical(0)
as.logical(-1)
as.integer(3.9)
paste(TRUE, 5)Understanding R’s data types is crucial for effective programming:
L) and doubles (default for numbers)Key points to remember:
typeof() and class()L suffixNext, we’ll explore how to combine these basic types into vectors, R’s fundamental data structure!