Browse Source

updated R intro files

master
chassenr 1 year ago
parent
commit
5e98b9247f
  1. 4
      R_intro_2022/README.md
  2. 55
      R_intro_2022/R_intro_script.R
  3. BIN
      R_intro_2022/R_intro_slides.pdf

4
R_intro_2022/README.md

@ -23,12 +23,12 @@ The workshop will consist of 2 sessions: The first session is focused more on th
* R data and object types
* Data and object type conversions
* Navigating R objects
* Reading data into R
* Good coding practice
### Session 2:
* Q&A session 1
* Reading data into R
* Rearranging R objects
* Loops
* Rudimentary data exploration (plots)

55
R_intro_2022/R_intro_script.R

@ -32,7 +32,7 @@ install.packages("scales")
# Save and load workspace
load("R_intro.Rdata")
save.image("R_intro.Rdata")
# COMMENT: Don't save .Rdata when prompted upon closing R. Either explicitly specifiy a file or don't save the workspace.
# COMMENT: Don't save .Rdata when prompted upon closing R. Either explicitly specify a file or don't save the workspace.
# In combination with the setwd() command, you will always know which data you are working with.
# COMMENT: I usually comment out the save.image() command so avoid accidentally overwriting an existing workspace
@ -45,7 +45,7 @@ save.image("R_intro.Rdata")
# QUESTION: What data type are you familiar with?
# For demonstration purposes, let's make up some data to look at common data and object types in R.
# For demonstration purposes, let's make up some data to look at common data and object types in R.
# For this we will use several R functions. Have a look at the help of these functions to find out more if you are interested.
# vectors ####
@ -54,7 +54,7 @@ save.image("R_intro.Rdata")
# They usually consist of the series of individual values.
# Those values must have the same data type.
# Here we assign (whith <-) a series of values, specifically 1, 2, 3, 4, 5, 16, 17, 18, 19, 20, to the R object x
# Here we assign (with <-) a series of values, specifically 1, 2, 3, 4, 5, 16, 17, 18, 19, 20, to the R object x
x <- c(1:5, 16:20)
# Show data type:
@ -99,6 +99,8 @@ as.numeric(as.factor(a))
# Why does as.numeric(as.factor(x)) not reproduce the original numeric vector?
as.factor(x)
levels(as.factor(x))
str(as.factor(x))
as.numeric(as.factor(x))
as.numeric(as.character(as.factor(x)))
@ -108,6 +110,9 @@ as.numeric(as.character(as.factor(x)))
sampling_time <- c("2022-03-02 12:00:00", "2022-03-04 12:00:00", "2022-03-10 12:00:00")
sampling_time_posix <- as.POSIXlt(sampling_time, format = "%Y-%m-%d %H:%M:%S", tz = "CET")
# sampling_time <- c("02.03.2022 12:00:00.234")
# sampling_time_posix <- as.POSIXlt(sampling_time, format = "%d.%m.%Y %H:%M:%OS", tz = "CET")
# COMMENT: If not setting a time zone (tz) make sure that R takes the correct one.
# This is especially important if measuring instruments run in a
# different time zone than the computer for data analysis.
@ -116,8 +121,9 @@ sampling_time_posix <- as.POSIXlt(sampling_time, format = "%Y-%m-%d %H:%M:%S", t
sampling_values <- c(1.5, 2, 1.7)
plot(sampling_time_posix, sampling_values)
# COMMENT: Notice that the x-axis is on a numeric scale
as.numeric(sampling_time_posix)
# Other useful functios include:
# Other useful functions include:
?as.Date
?Sys.setlocale
?axis.POSIXct
@ -214,8 +220,8 @@ rownames(DF) <- c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S11")
# There are several way how you can navigate R objects and retrieve specific information from them.
# Square brackets:
# For 2-dimensional objects square brackets can be used to target ```[rows, columns]``` by index (row or column number) or name.
# For 1-dimensional objects, the ```[position]``` is specified (again either by index or name).
# For 2-dimensional objects square brackets can be used to target [rows, columns] by index (row or column number) or name.
# For 1-dimensional objects, the [position] is specified (again either by index or name).
# The symbol used in the substructure of an object (usually $ or @) followed by the name of the target element.
# This only works with named elements of lists, incl. columns in data frames, and not vectors and matrices.
@ -236,7 +242,7 @@ rownames(DF) <- c("S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S11")
# Select the values for the columns V3 and V5 of row S1 in matrix X
# Select the column c, a, and x from data frame DF with and without changing the original order
# (hint: consider using the logical operator in%)
# (hint: consider using the logical operator %in%)
# Subset data frame DF to all rows where the values of column y are larger than 3
@ -282,16 +288,17 @@ str(L$int)
rm(list = ls())
# Then, we can read the new tables:
# community composition
ASV <- read.table(
"asvtab.txt", # file name
h = T, # first line contains column names
sep = "\t", # values separated by tabstops
sep = "\t", # values separated by tab stops
row.names = 1 # first row contains row names, i.e. OTU names
)
head(ASV)
# bottom water data (carbonate chemistry, nutrients)
# metadata and contextual information (environmental parameters)
META <- read.table(
"metadata.txt",
h = T,
@ -364,12 +371,12 @@ quantile(META$cell_count, seq(0, 1, 0.1))
# Casting data (similar to pivot tables)
# This function comes from the package reshape, but here are several other options to achieve the same thing in R.
cell_counts_overview <- cast(META, "day ~ salinity", mean, value = "cell_count")
cell_counts_overview <- cast(META, "day ~ salinity", mean, value = "cell_count", fill = 0)
# TASK: Modify the command so that instead of NA, the table is filled with zero.
# This is just an exercise. Never just replace NA with zero without considering the meaning of the NA first.
# TASK: Generate a table with the mean and temperature for each combination of experimental conditions.
# TASK: Generate a table with the mean of cell count and temperature for each combination of experimental conditions.
# Community composition is often expressed in percentages
# Converting counts to percentages
@ -382,11 +389,11 @@ ASV.rel <- prop.table(ASV, 2) * 100
### Loops ####
# To get an overview of community composition, let's calculate the percentages at all available taxonomic levels (domain to genus).
# Summarizing data like this is a repeptitive tasks and can be coded in a loop:
# Summarizing data like this is a repetitive tasks and can be coded in a loop:
# Create an empty list
TAX.pooled <- vector(mode = "list", length = 6)
names(TAX.pooled) <- colnames(TAX)[1:6] # domain to genus
names(TAX.pooled) <- colnames(TAX) # domain to genus
# Fill elements of list with data frames with sequence counts per taxon,
# one taxonomic level at a time
@ -409,7 +416,7 @@ str(TAX.pooled)
### Some very basic plots ####
# Before starting any kind of statical analysis, ALWAYS look at the data.
# Before starting any kind of statistical analysis, ALWAYS look at the data.
# The below examples are for data exploration only and
# do not necessarily represent publication quality data visualization approaches.
@ -424,14 +431,20 @@ plot(
# If there are more than 2 numeric variables (or those that can be coerced as such),
# the following will create all pairwise scatter plots:
pairs(META[, c(1, 2, 4, 6, 9)])
pairs(META[, c(1, 2, 4:6)])
# Boxplots ####
boxplot(
log(cell_count) ~ day * salinity, # formula for how to summarize the data in the boxplot
data = META, # object that contains the data
las = 2, # plot tick labels perpendicular to axis
col = rep(c("yellow2", "green", "red", "darkorchid2", "dodgerblue", "black"), 5) # define colors for the boxes
las = 2 # plot tick labels perpendicular to axis
# col = rep(day.color, 4) # define colors for the boxes
)
points(
as.numeric(interaction(META$day, META$salinity)),
log(META$cell_count),
bg = META$color, # background color of symbols
pch = META$symbol # point character
)
# QUESTION: What do you notice when looking at d0 in the plot?
@ -440,7 +453,7 @@ boxplot(
# Histograms ####
hist(
META$cell_count,
breaks = 20
breaks = 10
)
# TASK: Modify the breaks argument to change the size of the bins for each bar in the histogram.
@ -510,6 +523,8 @@ write.table(
# E.g. to ave lists and other objects which can't easily be represented in a tabular form.
# Use readRDS() to read such data into R again
saveRDS(TAX.pooled, "tax_summaries.rds")
TAX.pooled2 <- readRDS("tax_summaries.rds")
str(TAX.pooled2)
# TASK: Use a loop (or any other approach) to save the individual elements of TAX.pooled.rel
# (with values rounded to 2 decimals) as separate tables.
@ -673,6 +688,8 @@ plan(multicore, workers = 60)
knots <- 100 # pick 100 sequencing depth for rarefaction
n <- 10 # randomly subsample 10 times at each sequencing depth
# DO NOT RUN THIS UNLESS YOU HAVE ACCESS TO >10 CORES FOR PARALLEL COMPUTING!!!
# compare speed
system.time(
map(

BIN
R_intro_2022/R_intro_slides.pdf

Binary file not shown.
Loading…
Cancel
Save