Skip to contents
library(Rubrary)
library(dplyr)

# For `palmerpenguins` install
r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)

Data

Using palmerpenguins dataset from Allison Horst.

Load data

Get palmerpenguins data and add a unique ID per entry.

if (!requireNamespace("palmerpenguins", quietly = TRUE)){
  install.packages("palmerpenguins")
}
library(palmerpenguins)

penguins <- penguins %>%
  as.data.frame() %>%
  mutate(Sample_ID = paste0(penguins_raw$`Individual ID`, "_", penguins_raw$`Sample Number`))
  
rownames(penguins) <- penguins$Sample_ID

rmarkdown::paged_table(penguins)

Run PCA

Subset data to numeric only, then run PCA.

num <- penguins %>%
  dplyr::select(!c(species, island, sex, year, Sample_ID)) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::select(where(~!all(is.na(.x)))) # Filter out NAs
  
pca <- Rubrary::run_PCA(
  df = num,
  summary = T, tol = 0,
  center = T, scale = T,
  screeplot = T
)
#> ** Cumulative var. exp. >= 80% at PC 2 (88.2%)

Plot PCA

Use resulting prcomp object to plot PCA scores while annotating by species metadata.

Rubrary::plot_PCA(
  df_pca = pca,
  anno = penguins,
  annoname = "Sample_ID",
  annotype = "species",
  title = "Palmer Penguins PCA Scores - Species",
  label = FALSE,
  density = TRUE
)

Same PCA scores plot, but annotating by sex metadata.

Rubrary::plot_PCA(
  df_pca = pca,
  anno = penguins,
  annoname = "Sample_ID",
  annotype = "sex",
  title = "Palmer Penguins PCA Scores - Sex",
  label = FALSE,
  density = TRUE
)