4.1 Conversion with biomaRt
## Install missing packages
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
<- c("biomaRt", "Biostrings")
bio_pkgs for (pkg_i in bio_pkgs) {
if (!require(pkg_i, quietly = T, character.only = T))
::install(pkg_i)
BiocManager
}if (!require("remotes", quietly = T)) install.packages("remotes")
if (!require("MSnID", quietly = T, character.only = T))
::install_github("PNNL-Comp-Mass-Spec/MSnID@pnnl-master")
remotes## ------------------------
library(biomaRt) # ID conversion
library(Biostrings) # read FASTA files
library(MSnID) # parse_FASTA_names
The first steps are to determine which mart and dataset to use. listMarts
will show the available marts. The first 6 rows of the available datasets (provided by listDatasets(mart)
) are also shown. (Use View
, rather than head
, to search for the desired database.)
# Create mart
listMarts() # determine biomart for useMart
## biomart version
## 1 ENSEMBL_MART_ENSEMBL Ensembl Genes 106
## 2 ENSEMBL_MART_MOUSE Mouse strains 106
## 3 ENSEMBL_MART_SNP Ensembl Variation 106
## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 106
<- useMart(biomart = "ENSEMBL_MART_ENSEMBL")
mart head(listDatasets(mart)) # determine dataset for useMart
## dataset description
## 1 abrachyrhynchus_gene_ensembl Pink-footed goose genes (ASM259213v1)
## 2 acalliptera_gene_ensembl Eastern happy genes (fAstCal1.2)
## 3 acarolinensis_gene_ensembl Green anole genes (AnoCar2.0v2)
## 4 acchrysaetos_gene_ensembl Golden eagle genes (bAquChr1.2)
## 5 acitrinellus_gene_ensembl Midas cichlid genes (Midas_v5)
## 6 amelanoleuca_gene_ensembl Giant panda genes (ASM200744v2)
## version
## 1 ASM259213v1
## 2 fAstCal1.2
## 3 AnoCar2.0v2
## 4 bAquChr1.2
## 5 Midas_v5
## 6 ASM200744v2
<- useMart(biomart = "ENSEMBL_MART_ENSEMBL",
mart dataset = "rnorvegicus_gene_ensembl")
Next, we determine which attributes to select for the conversion table with listAttributes(mart)
. From that table, we select “refseq_peptide” and “external_gene_name”.
# Create conversion table
head(listAttributes(mart)) # determine attributes for getBM
## name description page
## 1 ensembl_gene_id Gene stable ID feature_page
## 2 ensembl_gene_id_version Gene stable ID version feature_page
## 3 ensembl_transcript_id Transcript stable ID feature_page
## 4 ensembl_transcript_id_version Transcript stable ID version feature_page
## 5 ensembl_peptide_id Protein stable ID feature_page
## 6 ensembl_peptide_id_version Protein stable ID version feature_page
<- getBM(attributes = c("refseq_peptide", "external_gene_name"),
conv_tbl1 mart = mart)
head(conv_tbl1, 10)
## refseq_peptide external_gene_name
## 1
## 2 AC118165.1
## 3 NP_001000130 Olr56
## 4 NP_001000302 Olr473
## 5 AC099294.1
## 6 AABR07054368.1
## 7 Olr760
## 8 NP_001014048 Clrn3
## 9 AABR07000137.1
## 10 NP_001011937 Doc2g
This table has a lot of blank entries that need to be removed.