# These are all doubles by default
<- 1.2
x <- 3.0
y <- 5
z
# Check their types
typeof(x)
[1] "double"
typeof(y)
[1] "double"
typeof(z) # Even though 5 looks like an integer, it's a double!
[1] "double"
In R, everything is an object, and every object has a data type. Understanding data types is fundamental to working effectively with R because it determines what operations you can perform and how R stores and processes your data.
R has four fundamental data types:
Let’s explore each of these in detail.
R distinguishes between two types of numbers:
By default, R treats all numbers as double (also called “numeric”):
# These are all doubles by default
<- 1.2
x <- 3.0
y <- 5
z
# Check their types
typeof(x)
[1] "double"
typeof(y)
[1] "double"
typeof(z) # Even though 5 looks like an integer, it's a double!
[1] "double"
To create an integer, you must explicitly specify it by adding L
after the number:
# Creating integers
<- 2L
a <- -123456789L
b <- 0L
c
# Check their types
typeof(a)
[1] "integer"
typeof(b)
[1] "integer"
typeof(c)
[1] "integer"
The difference between integers and doubles affects:
# Mathematical operations
<- 3.5 + 1.2
double_result typeof(double_result)
[1] "double"
<- 3L + 4L
integer_result typeof(integer_result) # Still integer!
[1] "integer"
<- 3L + 1.3 # Integer + double = double
mixed_result typeof(mixed_result)
[1] "double"
R can handle very large numbers, but there are limits:
# Scientific notation
<- 1.5e8
large_number print(large_number)
[1] 1.5e+08
# Very large numbers become Inf (infinity)
<- 1e1000
too_large print(too_large)
[1] Inf
# Very small numbers become 0
<- 1e-1000
too_small print(too_small)
[1] 0
# Check if something is numeric
is.numeric(3.14)
[1] TRUE
is.numeric("hello")
[1] FALSE
# Check specific types
is.integer(5L)
[1] TRUE
is.double(5.0)
[1] TRUE
# Convert between types
as.integer(3.7) # Truncates, doesn't round!
[1] 3
as.double(5L) # Convert integer to double
[1] 5
Character data represents text and is created using quotation marks:
# Using double quotes
<- "Hello, world!"
text1
# Using single quotes
<- 'This is also a string'
text2
# Mixing quotes (useful when string contains quotes)
<- "She said 'Hello!'"
text3 <- 'He replied "Hi there!"'
text4
print(text1)
[1] "Hello, world!"
print(text3)
[1] "She said 'Hello!'"
<- "Data Science is awesome!"
my_string
# Check type
typeof(my_string)
[1] "character"
# Check if it's character
is.character(my_string)
[1] TRUE
# Get string length
nchar(my_string)
[1] 24
# String concatenation
<- "Jane"
first_name <- "Doe"
last_name <- paste(first_name, last_name)
full_name print(full_name)
[1] "Jane Doe"
# Alternative concatenation
<- paste0(first_name, " ", last_name) # No space separator
full_name2 print(full_name2)
[1] "Jane Doe"
# Substrings
substr(full_name, 1, 4) # Characters 1 through 4
[1] "Jane"
# Numbers to strings
<- 25
age <- as.character(age)
age_text print(age_text)
[1] "25"
typeof(age_text)
[1] "character"
# Strings to numbers (if they represent numbers)
<- "42"
number_text <- as.numeric(number_text)
number_value print(number_value)
[1] 42
typeof(number_value)
[1] "double"
# What happens with non-numeric strings?
<- as.numeric("hello")
invalid_number print(invalid_number) # Returns NA (Not Available)
[1] NA
Logical data represents TRUE/FALSE values and is fundamental for conditional operations:
# Direct assignment
<- TRUE
is_student <- FALSE
has_job
# From comparisons
<- 10
x <- 5
y
<- x > y
is_greater <- x == y
is_equal <- x != y
is_not_equal
print(is_greater)
[1] TRUE
print(is_equal)
[1] FALSE
print(is_not_equal)
[1] TRUE
R provides six comparison operators:
<- 6
a <- 3
b
# All comparison operators
< b # Less than a
[1] FALSE
> b # Greater than a
[1] TRUE
<= b # Less than or equal to a
[1] FALSE
>= b # Greater than or equal to a
[1] TRUE
== b # Equal to a
[1] FALSE
!= b # Not equal to a
[1] TRUE
Combine logical values using logical operators:
<- TRUE
x <- FALSE
y
# AND operation
& y # FALSE (both must be TRUE) x
[1] FALSE
# OR operation
| y # TRUE (at least one must be TRUE) x
[1] TRUE
# NOT operation
!x # FALSE (flips the value)
[1] FALSE
!y # TRUE
[1] TRUE
# Complex logical expressions
<- 25
age <- TRUE
has_license <- (age >= 16) & has_license
can_drive print(can_drive)
[1] TRUE
# TRUE = 1, FALSE = 0 in arithmetic operations
TRUE + TRUE # 2
[1] 2
FALSE + TRUE # 1
[1] 1
TRUE * 5 # 5
[1] 5
# Counting TRUE values
<- c(85, 92, 78, 96, 88)
scores <- scores >= 80
passing_grades sum(passing_grades) # Count how many passed
[1] 4
mean(passing_grades) # Proportion who passed
[1] 0.8
R has several special values you should know about:
Represents missing data:
# Creating NA values
<- NA
missing_value <- c(25, 30, NA, 35, 28)
ages
# Check for NA
is.na(missing_value)
[1] TRUE
is.na(ages)
[1] FALSE FALSE TRUE FALSE FALSE
# Operations with NA
mean(ages) # Returns NA
[1] NA
mean(ages, na.rm = TRUE) # Remove NA values first
[1] 29.5
Represents “nothing” or absence of a value:
# NULL represents absence
<- NULL
empty_var length(empty_var) # 0
[1] 0
# NULL vs NA
is.null(NULL)
[1] TRUE
is.null(NA)
[1] FALSE
is.na(NULL)
logical(0)
is.na(NA)
[1] TRUE
Represent positive and negative infinity:
# Division by zero
<- 1/0
positive_inf <- -1/0
negative_inf
print(positive_inf)
[1] Inf
print(negative_inf)
[1] -Inf
# Check for infinity
is.infinite(positive_inf)
[1] TRUE
is.finite(positive_inf)
[1] FALSE
# Create different types
<- 42L
int_val <- 3.14
dbl_val <- "hello"
chr_val <- TRUE
log_val
# typeof() shows the exact type
typeof(int_val)
[1] "integer"
typeof(dbl_val)
[1] "double"
typeof(chr_val)
[1] "character"
typeof(log_val)
[1] "logical"
# class() shows the object class
class(int_val)
[1] "integer"
class(dbl_val)
[1] "numeric"
# Specific type checks
is.numeric(int_val) # TRUE (integers are numeric)
[1] TRUE
is.integer(int_val) # TRUE
[1] TRUE
is.double(int_val) # FALSE
[1] FALSE
is.character(chr_val) # TRUE
[1] TRUE
is.logical(log_val) # TRUE
[1] TRUE
R can convert between types:
# Explicit conversion
<- 3.7
x as.integer(x) # Truncates to 3
[1] 3
as.character(x) # "3.7"
[1] "3.7"
as.logical(x) # TRUE (non-zero numbers are TRUE)
[1] TRUE
# Converting strings to numbers
<- c("1", "2.5", "3")
text_numbers <- as.numeric(text_numbers)
numeric_values print(numeric_values)
[1] 1.0 2.5 3.0
# What happens with invalid conversions?
as.numeric(c("1", "hello", "3")) # Returns c(1, NA, 3)
[1] 1 NA 3
R automatically converts types when needed:
# Mixing types in operations
<- 5L + 3.2 # integer + double = double
result1 typeof(result1)
[1] "double"
<- TRUE + 5 # logical + numeric = numeric
result2 print(result2) # TRUE becomes 1
[1] 6
# result3 <- "Number: " + 5 # This will cause an error!
# Instead, use paste():
<- paste("Number:", 5)
result3 print(result3)
[1] "Number: 5"
# Survey responses
<- 1:5
respondent_id <- c(25, 30, 28, 35, 29)
age <- c(45000, 52000, 48000, 65000, 51000)
income <- c(TRUE, TRUE, FALSE, TRUE, TRUE)
satisfied <- c("Great!", "Good service", "Could be better", "Excellent", "Very happy")
feedback
# Analysis
<- mean(age)
avg_age <- mean(income)
avg_income <- mean(satisfied)
satisfaction_rate <- length(respondent_id)
num_responses
# Create summary
<- paste("Survey Results:",
summary_text "\nAverage age:", round(avg_age, 1),
"\nAverage income: $", format(avg_income, big.mark = ","),
"\nSatisfaction rate:", round(satisfaction_rate * 100, 1), "%",
"\nTotal responses:", num_responses)
cat(summary_text)
Survey Results:
Average age: 29.4
Average income: $ 52,200
Satisfaction rate: 80 %
Total responses: 5
# Simulated data with quality issues
<- c(72, 75, NA, 80, 999, -50, 77)
temperatures
# Quality checks
<- temperatures >= 0 & temperatures <= 120
valid_range <- is.na(temperatures)
has_missing <- temperatures > 100 | temperatures < 0
suspicious_values
# Results
cat("Valid temperatures:", sum(valid_range, na.rm = TRUE), "\n")
Valid temperatures: 4
cat("Missing values:", sum(has_missing), "\n")
Missing values: 1
cat("Suspicious values:", sum(suspicious_values, na.rm = TRUE), "\n")
Suspicious values: 2
# Clean the data
<- temperatures[valid_range & !has_missing]
clean_temperatures print(clean_temperatures)
[1] 72 75 80 77
# This might surprise you!
<- 5
x <- 5L
y
identical(x, y) # FALSE! Different types
[1] FALSE
== y # TRUE (values are equal) x
[1] TRUE
# Best practice: be explicit about integer types when needed
# Common mistake
<- c("1", "2", "3")
numbers_as_text # numbers_as_text + 1 # This would error!
# Correct approach
<- as.numeric(numbers_as_text)
numbers + 1 # Now this works numbers
[1] 2 3 4
# Useful for counting
<- c(85, 92, 78, 96, 75, 88)
test_scores <- test_scores >= 80
passing_scores
# Count passing scores
<- sum(passing_scores)
num_passing
# Percentage passing
<- mean(passing_scores) * 100
pct_passing
cat("Students passing:", num_passing, "\n")
Students passing: 4
cat("Percentage passing:", round(pct_passing, 1), "%\n")
Percentage passing: 66.7 %
Create variables of each data type and explore their properties:
# Create one variable of each type
<- ___
my_integer <- ___
my_double <- ___
my_character <- ___
my_logical
# Check their types using typeof()
# Convert between types using as.* functions
# Try some arithmetic operations
Given this data about employees, determine the appropriate data type for each variable:
What will be the result and type of each expression?
TRUE + 2
"5" + 3
as.logical(0)
as.logical(-1)
as.integer(3.9)
paste(TRUE, 5)
Understanding R’s data types is crucial for effective programming:
L
) and doubles (default for numbers)Key points to remember:
typeof()
and class()
L
suffixNext, we’ll explore how to combine these basic types into vectors, R’s fundamental data structure!