Section 3 Spectral Counting
This is a generic spectral counting script for MS-GF+ Human/UniProt searches. Only modify the lines that change the data package number and the name of the final .xlsx file that will be saved, unless you know what you are doing.
## Uncomment to install missing packages
# install.packages("devtools")
# library(devtools)
# install_github("PNNL-Comp-Mass-Spec/MSnID@pnnl-master")
# install_github("PNNL-Comp-Mass-Spec/PlexedPiper")
# install_github("PNNL-Comp-Mass-Spec/PNNL.DMS.utils")
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("MSnbase")
# install.packages("writexl")
# install.packages("dplyr")
# install.packages("tibble")
library(MSnID)
library(PlexedPiper)
library(PNNL.DMS.utils)
library(MSnbase)
library(writexl)
library(dplyr)
library(tibble)
# Data package number
<- 3987
data_package_num # Name of the final file to save
<- "data/3987_spectral_counts.xlsx" file_name
Do not modify anything below unless you know what you are doing.
# Read MS-GF+ results from the DMS
<- read_msgf_data_from_DMS(data_package_num = data_package_num)
m
# Filter to 1% FDR at the peptide level
<- filter_msgf_data(m, level = "peptide", fdr.max = 0.01)
m
# UniProt to gene symbol conversion table
<- fetch_conversion_table(organism_name = "Homo sapiens",
conv_tab from = "UNIPROT", to = "SYMBOL")
When running fetch_conversion_table
, if a prompt appears that requires an answer, type yes
and press enter.
# Modify accessions column of psms to use gene symbols
<- remap_accessions(m, conv_tab, "\\|([^|-]+)(-\\d+)?\\|")
m
# Do the same remapping to the FASTA file
<- path_to_FASTA_used_by_DMS(data_package_num = data_package_num)
fst_path <- remap_fasta_entry_names(
fst_path_2 path_to_FASTA = fst_path, conversion_table = conv_tab,
extraction_pttrn = "\\|([^|-]+)(-\\d+)?\\|"
)
# Compute the number of amino acids per 1000 and use that to filter
# to 1% FDR at the protein level
<- compute_num_peptides_per_1000aa(m, fst_path_2)
m <- filter_msgf_data(m, "accession", fdr.max = 0.01)
m
# Parsimonious protein inference
<- infer_parsimonious_accessions(m)
m show(m) # Assessment of filtering quality
## MSnID object
## Working directory: "."
## #Spectrum Files: 8
## #PSMs: 114125 at 0.012 % FDR
## #peptides: 23209 at 0.043 % FDR
## #accessions: 2362 at 0.38 % FDR
The results look reasonable, so we will continue on to spectral counting.
# Remove decoys
<- apply_filter(m, "!isDecoy")
m
# Convert m to an MSnSet
<- as(m, "MSnSet")
msnset
# Spectral counting:
# Within each accession group, sum the values within columns.
<- combineFeatures(msnset,
msnset fData(msnset)$accession,
redundancy.handler = "multiple",
method = "sum",
cv = FALSE)
# Sort features from most to least abundant
<- rowSums(exprs(msnset))
tot_count <- msnset[order(-tot_count), ] msnset
# Save exprs as an .xlsx file
%>%
msnset exprs() %>%
as.data.frame() %>%
rownames_to_column("Gene") %>%
write_xlsx(path = file_name)