This tutorial shows how to create a complete human genome browser with the D3GB R package. This example creates the genome browser and adds several tracks such as RNASeq expression, GWAS results, variants from exome sequencing samples, Variant Effect Predictor (VEP) annotations of variants, protein domains and gene annotations. Install the D3GB package and write the following code in R:
library(D3GB)
# Create a temporary dir
temp <- tempfile()
dir.create(temp)
setwd(temp)
# Download original files and transform it to BED format
# Genes track
download.file("http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refFlat.txt.gz","refFlat.txt.gz")
genes <- read.delim("refFlat.txt.gz",FALSE,quote="")
genes[,3] <- sub('chr','',genes[,3])
genes <- genes[genes[,3] %in% GRCh37[,1],]
exonSize <- apply(genes,1,function(x) paste(as.numeric(strsplit(x[11],",")[[1]])-as.numeric(strsplit(x[10],",")[[1]]),collapse=","))
exonStart <- apply(genes,1,function(x) paste(as.numeric(strsplit(x[10],",")[[1]])-as.numeric(x[5]),collapse=","))
genesBED <- data.frame(chr = genes[,3], start = genes[,5], end = genes[,6], name = genes[,1], score = genes[,2], strand = genes[,4], thickStart = genes[,7], thickEnd = genes[,8], itemRGB = NA, blockCount = genes[,9], blockSize = exonSize, blockStarts = exonStart)
# Add PFAM domains
download.file("http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/ucscGenePfam.txt.gz","ucscGenePfam.txt.gz")
domains <- read.delim("ucscGenePfam.txt.gz",FALSE,quote="")
domains[,2] <- sub('chr','',domains[,2])
domains[,c(6,10)] <- NA
domainsBED <- domains[domains[,2] %in% GRCh37[,1],2:13]
# Example of how to add tracks from a list. See below for other options
tracks <- list(refFlat = genesBED, Pfam = domainsBED)
# Specify tracks type an color
types <- c("exons","domain")
colors <- c("#B8860B","#000000")
# Download GWAS Catalog in BED format
download.file("http://d3gb.usal.es/docs/data/GWAS_Catalog","GWAS_Catalog")
# Genome browser generation.
gb <- genomebrowser(GRCh37.bands, tracks, types, colors, "GWAS_Catalog")
# Adding tracks one by one. This adds GWAS Catalog to the genome browser
genome_addTrack(gb,"GWAS_Catalog","GWAS_Catalog","value","#A52A2A",c(0,100))
# Add VEP annotated VCF
download.file("http://d3gb.usal.es/docs/data/trio_vep","trio_vep")
genome_addVCF(gb,"trio_vep","trio_vep",c('Existing_variation','Consequence','IMPACT','GMAF'))
# ADD RNASeq tissue expression data
for(i in c("adipose_tissue", "adrenal_gland", "brain", "heart", "kidney", "liver", "lung", "ovary", "pancreas", "sigmoid_colon", "small_intestine", "spleen", "testis")){
download.file(paste0("http://d3gb.usal.es/docs/data/",i),i)
genome_addTrack(gb,i,i,"score")
}
# Load genome sequence
download.file("http://ftp.ensembl.org/pub/grch37/release-85/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz","GRCh37.fa.gz")
genome_addSequence(gb, "GRCh37.fa.gz")
# It creates a genome browser ready to be viewed in your browser.
plot(gb)
view result