options(install.packages.compile.from.source = "always")
list.packages <- c(
"readxl", "ggplot2", "ggpubr", "matrixStats", "ggrepel", "reshape2", "dplyr", "stringr",
"grid", "tcltk", "parallel", "doParallel", "doSNOW", "data.table", "gplots",
"randomcoloR", "factoextra", "RColorBrewer", "grDevices", "gmp", "xtable", "latex2exp",
"httr", "jsonlite", "curl", "RCurl", "magrittr", "rlist", "pipeR", "plyr",
"xml2", "rvest", "knn.covertree", "knitr", "rlang", "visNetwork", "hwriter", "htmltools"
)
bio.list.packages <- c(
"limma", "ExperimentHub", "clusterProfiler", "GO.db",
"org.Mm.eg.db", "pathview", "enrichplot", "org.Hs.eg.db"
)
new.packages <- list.packages[!list.packages %in% installed.packages()]
bio.new.packages <- bio.list.packages[!bio.list.packages %in% installed.packages()]
# Packages that does not install yet
if (!requireNamespace("BiocManager", quietly = TRUE)) {
update.packages(ask = FALSE)
install.packages("BiocManager", repos="https://cloud.r-project.org")
}
if (length(new.packages)) {
install.packages(new.packages)
devtools::install_github("grimbough/biomaRt", force = FALSE)
}
if (length(bio.new.packages)) {
update.packages(ask = FALSE)
BiocManager::install(bio.new.packages)
}
# Loading all packages & functions
invisible(lapply(c(list.packages, bio.list.packages, "biomaRt"), library, character.only = TRUE))
Attaching package: ‘dplyr’
The following object is masked from ‘package:matrixStats’:
count
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Loading required package: foreach
Loading required package: iterators
Loading required package: snow
Attaching package: ‘snow’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, clusterSplit, makeCluster, parApply,
parCapply, parLapply, parRapply, parSapply, splitIndices, stopCluster
data.table 1.14.8 using 64 threads (see ?getDTthreads). Latest news: r-datatable.com
Attaching package: ‘data.table’
The following objects are masked from ‘package:dplyr’:
between, first, last
The following objects are masked from ‘package:reshape2’:
dcast, melt
Attaching package: ‘gplots’
The following object is masked from ‘package:stats’:
lowess
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
Attaching package: ‘gmp’
The following objects are masked from ‘package:base’:
%*%, apply, crossprod, matrix, tcrossprod
Using libcurl 7.81.0 with OpenSSL/3.0.2
Attaching package: ‘curl’
The following object is masked from ‘package:httr’:
handle_reset
----------------------------------------------------------------------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
----------------------------------------------------------------------------------------------------------------------------------------------
Attaching package: ‘plyr’
The following objects are masked from ‘package:dplyr’:
arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
The following object is masked from ‘package:matrixStats’:
count
The following object is masked from ‘package:ggpubr’:
mutate
Attaching package: ‘rlang’
The following object is masked from ‘package:xml2’:
as_list
The following object is masked from ‘package:magrittr’:
set_names
The following objects are masked from ‘package:jsonlite’:
flatten, unbox
The following object is masked from ‘package:data.table’:
:=
Loading required package: BiocGenerics
Attaching package: ‘BiocGenerics’
The following object is masked from ‘package:limma’:
plotMA
The following objects are masked from ‘package:gmp’:
which.max, which.min
The following objects are masked from ‘package:dplyr’:
combine, intersect, setdiff, union
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter,
Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which.max,
which.min
Loading required package: AnnotationHub
Loading required package: BiocFileCache
Loading required package: dbplyr
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
Attaching package: ‘dbplyr’
The following objects are masked from ‘package:dplyr’:
ident, sql
clusterProfiler v4.6.2 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
If you use clusterProfiler in published research, please cite:
T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
Attaching package: ‘clusterProfiler’
The following objects are masked from ‘package:plyr’:
arrange, mutate, rename, summarise
The following object is masked from ‘package:stats’:
filter
Loading required package: AnnotationDbi
Loading required package: stats4
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and
for packages 'citation("pkgname")'.
Attaching package: ‘Biobase’
The following object is masked from ‘package:ExperimentHub’:
cache
The following object is masked from ‘package:AnnotationHub’:
cache
The following object is masked from ‘package:rlang’:
exprs
The following object is masked from ‘package:httr’:
content
The following objects are masked from ‘package:matrixStats’:
anyMissing, rowMedians
Loading required package: IRanges
Loading required package: S4Vectors
Attaching package: ‘S4Vectors’
The following object is masked from ‘package:clusterProfiler’:
rename
The following object is masked from ‘package:plyr’:
rename
The following object is masked from ‘package:rlist’:
List
The following object is masked from ‘package:gplots’:
space
The following objects are masked from ‘package:data.table’:
first, second
The following objects are masked from ‘package:dplyr’:
first, rename
The following objects are masked from ‘package:base’:
expand.grid, I, unname
Attaching package: ‘IRanges’
The following object is masked from ‘package:clusterProfiler’:
slice
The following object is masked from ‘package:plyr’:
desc
The following object is masked from ‘package:data.table’:
shift
The following objects are masked from ‘package:dplyr’:
collapse, desc, slice
Attaching package: ‘AnnotationDbi’
The following object is masked from ‘package:clusterProfiler’:
select
The following object is masked from ‘package:dplyr’:
select
##############################################################################
Pathview is an open source software package distributed under GNU General
Public License version 3 (GPLv3). Details of GPLv3 is available at
http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
formally cite the original Pathview paper (not just mention it) in publications
or products. For details, do citation("pathview") within R.
The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
license agreement (details at http://www.kegg.jp/kegg/legal.html).
##############################################################################
Attaching package: ‘enrichplot’
The following object is masked from ‘package:ggpubr’:
color_palette
options(rgl.useNULL = F, ggrepel.max.overlaps = Inf)
Load the functional enrichment analysis (FEA) results from our re-analyzed GSE138783 Bulk RNA-seq
load(file.path(".", "01.HEK293_TMG_OSMI2", "00.Results", "DESeq2_Results.Rdata"))
Select the comparison with the most different treatment which is 6h TMG vs 6h OSMI-1
ORA_list <- DEGs_ls[["HEK293_TMG_6hBvsHEK293_OSMI2_6hA"]][["Over_Represent"]]
Keep the GO_BP terms that contains either “mitochondria” or “cytoskeleton”
for(i in c("All", "Up_regulated", "Down_regulated")){
temp <- ORA_list[[i]]$GO_BP$Raw@result$Description %>%
grep("(mitochondria(l){0,1}|cytoskele(ton|tal))", ., ignore.case = TRUE)
ORA_list[[i]]$GO_BP$Raw@result <- ORA_list[[i]]$GO_BP$Raw@result[temp,]
ORA_list[[i]]$GO_BP$Sig <- ORA_list[[i]]$GO_BP$Raw@result
}
names <- "TMG 6h vs OSMI-1 6h"
k <- "GO_BP"
temp <- ORA_list
mydf<-data.frame()
geneClusters<-list()
for(j in c("All","Up_regulated","Down_regulated")){
if(is.null(nrow(temp[[j]][[k]][["Sig"]]))){next()}
mydf<-rbind(
mydf,
cbind(
group=rep(j,nrow(temp[[j]][[k]][["Sig"]])),
temp[[j]][[k]][["Sig"]][,1:9]
)
)
geneClusters[[j]]<-(temp[[j]][[k]][["Sig"]]$geneID)%>%
str_split(.,"/")%>%unlist()%>%unique()
}
rownames(mydf)<-1:nrow(mydf)
mydf$group<-factor(mydf$group,levels=c("All","Up_regulated","Down_regulated"))
colnames(mydf)[1]<-"Cluster"
res <- new("compareClusterResult",
compareClusterResult = mydf,
geneClusters = geneClusters
)
.call<-match.call(compareCluster,call("compareCluster",
ENSEMBL~group, data=mydf, fun="enrichGO",
OrgDb = org.Hs.eg.db,
keyType = "ENSEMBL",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff = 1,
readable = TRUE))
.call$geneClusters<-as.symbol("geneClusters")
.call$OrgDb<-as.symbol("org.Hs.eg.db")
.call$data<-NULL
res@.call<-.call
res@fun<-"enrichGO"
res@keytype<-"UNKNOWN"
res@readable<-FALSE
if(!dir.exists(file.path(".","01.PlotOut"))){
dir.create(file.path(".","01.PlotOut"))
}
gp <- dotplot(
res, showCategory=1000, label_format=85, color = "pvalue",
title=paste0("ORA Summary of ", names, " for ", k," Terms.")
)
svg(filename = file.path(".", "01.PlotOut", "FEA_Overlap.svg"), width = 12, height = 17, pointsize = 12)
print(gp)
dev.off()
null device
1
graphics.off()
knitr::opts_knit$set(global.device = TRUE)
print(gp)
knitr::opts_knit$set(global.device = FALSE)
j <- "GO_BP"
k <- "All"
Par_name <- "TMG 6h vs OSMI-1 6h"
FEA_title<-paste0(k, " ", nrow(ORA_list[[k]][[j]]$Raw)," significant GO_", j, " terms (", Par_name, "; the most descend terms)")
p1<-dotplot(ORA_list[[k]][[j]]$Raw, showCategory=1000, label_format=100, color = "pvalue", font.size = 18)
p2<-treeplot(
pairwise_termsim(ORA_list[[k]][[j]]$Raw, method="JC"), showCategory=1000, color = "pvalue",
cluster.params=list(hclust_method = "ward.D2", label_words_n = 5, n = 5, font.size = 18)
)
gp <- gridExtra::grid.arrange(
p1, p2, nrow = 1, widths = c(1, 1),
top=textGrob(paste0("GSEA: ",FEA_title," with similarity summary"), gp=gpar(fontsize=18, col="black"))
)
svg(filename = file.path(".", "01.PlotOut", "FEA_All.svg"), width = 26, height = 16, pointsize = 12)
plot(gp)
dev.off()
png
2
graphics.off()
knitr::opts_knit$set(global.device = TRUE)
plot(gp)
knitr::opts_knit$set(global.device = FALSE)
j <- "GO_BP"
k <- "Up_regulated"
Par_name <- "TMG 6h vs OSMI-1 6h"
FEA_title<-paste0(k, " ", nrow(ORA_list[[k]][[j]]$Raw)," significant GO_", j, " terms (", Par_name, "; the most descend terms)")
p1<-dotplot(ORA_list[[k]][[j]]$Raw, showCategory=1000, label_format=100, color = "pvalue", font.size = 18)
p2<-treeplot(
pairwise_termsim(ORA_list[[k]][[j]]$Raw, method="JC"), showCategory=1000, color = "pvalue",
cluster.params=list(hclust_method = "ward.D2", label_words_n = 5, n = 5, font.size = 18)
)
gp <- gridExtra::grid.arrange(
p1, p2, nrow = 1, widths = c(1, 1),
top=textGrob(paste0("GSEA: ",FEA_title," with similarity summary"), gp=gpar(fontsize=18, col="black"))
)
svg(filename = file.path(".", "01.PlotOut", "FEA_Up.svg"), width = 16, height = 9, pointsize = 12)
plot(gp)
dev.off()
png
2
graphics.off()
knitr::opts_knit$set(global.device = TRUE)
plot(gp)
knitr::opts_knit$set(global.device = FALSE)
j <- "GO_BP"
k <- "Down_regulated"
Par_name <- "TMG 6h vs OSMI-1 6h"
FEA_title<-paste0(k, " ", nrow(ORA_list[[k]][[j]]$Raw)," significant GO_", j, " terms (", Par_name, "; the most descend terms)")
p1<-dotplot(ORA_list[[k]][[j]]$Raw, showCategory=1000, label_format=70, color = "pvalue", font.size = 18)
p2<-treeplot(
pairwise_termsim(ORA_list[[k]][[j]]$Raw, method="JC"), showCategory=1000, color = "pvalue",
cluster.params=list(hclust_method = "ward.D2", label_words_n = 5, n = 5, font.size = 18), label_format=25
)
gp <- gridExtra::grid.arrange(
p1, p2, nrow = 1, widths = c(1, 1),
top=textGrob(paste0("GSEA: ",FEA_title," with similarity summary"), gp=gpar(fontsize=18, col="black"))
)
svg(filename = file.path(".", "01.PlotOut", "FEA_Down.svg"), width = 26, height = 15, pointsize = 12)
plot(gp)
dev.off()
png
2
graphics.off()
knitr::opts_knit$set(global.device = TRUE)
plot(gp)
knitr::opts_knit$set(global.device = FALSE)
In this section we will check the up-stream transcription factors that regulates the genes inside the Top5 overlapped mitochondrial/cytoskeletal GO_BP terms by using TFLink database.
The Top5 overlapped mitochondrial/cytoskeletal GO_BP terms are: 1. GO:0030705, cytoskeleton-dependent intracellular transport 2. GO:0061640, cytoskeleton-dependent cytokinesis 3. GO:0140053, mitochondrial gene expression 4. GO:0070507, regulation of microtubule cytoskeleton organization 5. GO:0006839, mitochondrial transport
Download TFLink database
TFLink <- fread(
"https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz"
)
|==================================================|
Check the tables
head(res@compareClusterResult)
head(TFLink)
Get annotation of genes related to Mitochondrial/Cytoskeletal GO_BP terms.
TOP5 <- res@compareClusterResult[res@compareClusterResult$ID %in% c("GO:0030705", "GO:0061640", "GO:0140053", "GO:0070507", "GO:0006839"),]
GeneLs <- TOP5$geneID %>%
str_split(., "/") %>%
unlist() %>%
unique()
Ann_0mart<-NULL
attempt<-1
attempt_max<-12
while(is.null(Ann_0mart)&&attempt<13){
if(attempt%in%seq(from=1,to=attempt_max,3)){
url<-"https://useast.ensembl.org"
}else if(attempt%in%seq(from=2,to=attempt_max,3)){
url<-"https://www.ensembl.org"
}else if(attempt%in%seq(from=3,to=attempt_max,3)){
url<-"https://useast.ensembl.org/"
}else if(attempt%in%seq(from=4,to=attempt_max,3)){
url<-"https://asia.ensembl.org/"
}
try({
Ann_0mart <- useDataset(
"hsapiens_gene_ensembl",
useMart("ENSEMBL_MART_ENSEMBL", host = url)
)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(Ann_0mart)){Sys.sleep(10)}
}
Anno_GeneLs <- getBM(
mart = Ann_0mart,
attributes = c("hgnc_symbol", "ensembl_gene_id", "external_gene_name"),
filter = "ensembl_gene_id",
values = GeneLs,
uniqueRows = TRUE
)
rownames(Anno_GeneLs)<-Anno_GeneLs$ensembl_gene_id
Make functions based on the Uniport API and prepare the gene list.
library(httr)
isJobReady <- function(jobId) {
pollingInterval <- 5
nTries <- 20
for (i in 1:nTries) {
url <- paste("https://rest.uniprot.org/idmapping/status/", jobId, sep = "")
r <- GET(url = url, accept_json())
status <- httr::content(r, as = "parsed")
if (!is.null(status[["results"]]) || !is.null(status[["failedIds"]])) {
return(TRUE)
}
if (!is.null(status[["messages"]])) {
print(status[["messages"]])
return(FALSE)
}
Sys.sleep(pollingInterval)
}
return(FALSE)
}
get_next_link <- function(headers) {
if (!is.null(headers[["Link"]])) {
match <- str_match(
headers[["Link"]],
'^\\<\\s*(.*?)\\s*\\>\\; rel\\=\\"next\\"$'
)
if (!isEmpty(match)) {
return(match[2])
}
} else {
NULL
}
}
Fetch UniprotID for the Mitochondrial/Cytoskeletal genes using Ensembl ID
temp <- Anno_GeneLs$ensembl_gene_id
files <- list(
from = "Ensembl",
to = "UniProtKB-Swiss-Prot",
ids = paste0(temp, collapse = ",")
)
r <- POST(url = "https://rest.uniprot.org/idmapping/run", body = files, encode = "multipart", accept_json())
r <- POST(url = "https://rest.uniprot.org/idmapping/run", body = files, encode = "multipart", accept_json())
while (r$status_code != 200) {
r <- POST(url = "https://rest.uniprot.org/idmapping/run", body = files, encode = "multipart", accept_json())
Sys.sleep(5)
}
submission <- httr::content(r, as = "parsed")
if (isJobReady(submission[["jobId"]])) {
url <- paste("https://rest.uniprot.org/idmapping/details/", submission[["jobId"]], sep = "")
response <- GET(url = url, accept_json())
details <- httr::content(response, as = "parsed")
res_url <- paste0(
details[["redirectURL"]],
"?fields=accession%2Creviewed%2Cid%2Cprotein_name%2Clength%2Cgene_primary%2Cgo%2Cft_zn_fing%2Cft_repeat%2Cft_region%2Cft_domain%2Cft_motif%2Ccc_domain%2Cft_compbias%2Cft_coiled%2Cannotation_score%2Ckeyword%2Ccomment_count%2Ccc_subcellular_location%2Ccc_function%2Ccc_pathway%2Cprotein_families%2Cft_carbohyd%2Cft_peptide%2Cft_transit%2Cft_signal%2Cft_propep%2Ccc_ptm%2Cft_init_met%2Cft_lipid%2Cft_mod_res%2Cft_disulfid%2Cft_crosslnk%2Cft_chain%2Cxref_kegg&format=tsv&size=500"
)
response <- GET(url = res_url, accept_json())
total <- response$headers$`x-total-results`
temp2 <- data.table::fread(
res_url,
verbose = TRUE, showProgress = TRUE
) %>% as.data.frame()
while (!is.null(res_url)) {
response <- GET(url = res_url, accept_json())
res_url <- get_next_link(headers(response))
if (!is.null(res_url)) {
temp2 <- rbind(
temp2,
data.table::fread(
res_url,
verbose = FALSE, showProgress = FALSE
) %>% as.data.frame()
)
print(
paste0(
nrow(temp2), "/", total
)
)
}
}
}
OpenMP version (_OPENMP) 201511
omp_get_num_procs() 128
R_DATATABLE_NUM_PROCS_PERCENT unset (default 50)
R_DATATABLE_NUM_THREADS unset
R_DATATABLE_THROTTLE unset (default 1024)
omp_get_thread_limit() 2147483647
omp_get_max_threads() 128
OMP_THREAD_LIMIT unset
OMP_NUM_THREADS unset
RestoreAfterFork true
data.table is using 64 threads with throttle==1024. See ?setDTthreads.
Downloaded 299 bytes...
Downloaded 5845 bytes...
Downloaded 24541 bytes...
Downloaded 41191 bytes...
Downloaded 61268 bytes...
Downloaded 77908 bytes...
Downloaded 96363 bytes...
Downloaded 105344 bytes...
Downloaded 113424 bytes...
Downloaded 135626 bytes...
Downloaded 168344 bytes...
Downloaded 184703 bytes...
Downloaded 217421 bytes...
Downloaded 233780 bytes...
Downloaded 266498 bytes...
Downloaded 282857 bytes...
Downloaded 381011 bytes...
Downloaded 397370 bytes...
Downloaded 417425 bytes...
Input contains no \n. Taking this to be a filename to open
[01] Check arguments
Using 64 threads (omp_get_max_threads()=128, nth=64)
NAstrings = [<<NA>>]
None of the NAstrings look like numbers.
show progress = 1
0/1 column will be read as integer
[02] Opening the file
Opening file /tmp/RtmpXF8M4i/file14cae4651113d5.
File opened, size = 1.736MB (1820314 bytes).
Memory mapped ok
[03] Detect and skip BOM
[04] Arrange mmap to be \0 terminated
\n has been found in the input and different lines can end with different line endings (e.g. mixed \n and \r\n in one file). This is common and ideal.
[05] Skipping initial rows if needed
Positioned on line 1 starting: <<From Entry Reviewed Entry Name>>
[06] Detect separator, quoting rule, and ncolumns
Detecting sep automatically ...
sep=',' with 2 lines of 4 fields using quote rule 0
sep='|' with 2 lines of 18 fields using quote rule 0
sep=0x9 with 100 lines of 36 fields using quote rule 0
Detected 36 columns on line 1. This line is either column names or first data row. Line starts as: <<From Entry Reviewed Entry Name>>
Quote rule picked = 0
fill=false and the most number of columns found is 36
[07] Detect column types, good nrow estimate and whether first row is column names
Number of sampling jump points = 1 because (1820313 bytes from row 1 to eof) / (2 * 478689 jump0size) == 1
Type codes (jump 000) : CCCCC5CCCCCCCCCC7CCCCCCC2CCCCCCCCCCC Quote rule 0
Type codes (jump 001) : CCCCC5CCCCCCCCCC7CCCCCCC2CCCCCCCCCCC Quote rule 0
'header' determined to be true due to column 6 containing a string on row 1 and a lower type (int32) in the rest of the 170 sample rows
=====
Sampled 170 rows (handled \n inside quoted fields) at 2 jump points
Bytes from first data row on line 2 to the end of last row: 1819858
Line length: mean=4210.86 sd=3219.03 min=839 max=18007
Estimated number of rows: 1819858 / 4210.86 = 433
Initial alloc = 866 rows (433 + 100%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]
=====
[08] Assign column names
[09] Apply user overrides on column types
After 0 type and 0 drop user overrides : CCCCC5CCCCCCCCCC7CCCCCCC2CCCCCCCCCCC
[10] Allocate memory for the datatable
Allocating 36 column slots (36 - 0 dropped) with 866 rows
[11] Read the data
jumps=[0..1), chunk_size=4210858, total_size=1819858
1 out-of-sample type bumps: CCCCC5CCCCCCCCCC7CCCCCCCCCCCCCCCCCCC
jumps=[0..1), chunk_size=4210858, total_size=1819858
Read 428 rows x 36 columns from 1.736MB (1820314 bytes) file in 00:00.027 wall clock time
[12] Finalizing the datatable
Type counts:
1 : int32 '5'
1 : float64 '7'
34 : string 'C'
=============================
0.001s ( 4%) Memory map 0.002GB file
0.016s ( 60%) sep='\t' ncol=36 and header detection
0.000s ( 1%) Column type detection using 170 sample rows
0.000s ( 1%) Allocation of 866 rows x 36 cols (0.000GB) of which 428 ( 49%) rows used
0.009s ( 34%) Reading 1 chunks (0 swept) of 4.016MB (each chunk 428 rows) using 1 threads
+ 0.003s ( 11%) Parse to row-major thread buffers (grown 0 times)
+ 0.005s ( 21%) Transpose
+ 0.001s ( 2%) Waiting
0.001s ( 5%) Rereading 1 columns due to out-of-sample type exceptions
0.027s Total
Column 25 ("Peptide") bumped from 'bool8' to 'string' due to <<PEPTIDE 688..713; /note="P3(42)"; /id="PRO_0000000095"; PEPTIDE 688..711; /note="P3(40)"; /id="PRO_0000000096">> on row 243
colnames(temp2)[1] <- "EnsemblID"
a <- str_split(temp2$EnsemblID, ",")
temp <- which((lengths(a) >= 2) == TRUE)
for (i in temp) {
temp1 <- a[[i]]
temp2$EnsemblID[i] <- temp1[1]
for (j in 2:length(temp1)) {
temp2 <- rbind(temp2, temp2[i, ])
nrow <- nrow(temp2)
temp2$EnsemblID[nrow] <- temp1[j]
}
}
dup_ls <- temp2$EnsemblID[duplicated(temp2$EnsemblID)]
for(i in dup_ls){
temp <- temp2[temp2$EnsemblID!=i,]
dup_rows <- temp2[temp2$EnsemblID==i,]
dup_rows <- dup_rows[dup_rows$Length == max(dup_rows$Length),]
temp2 <-rbind(dup_rows, temp)
}
rownames(temp2) <- temp2$EnsemblID
Anno_GeneLs <- temp2
Get related Transcription Factors (TFs)
TFLink_related <- TFLink[TFLink$UniprotID.Target %in% Anno_GeneLs$Entry,]
#TF_ls <- unique(TFLink_related$UniprotID.TF)
#TFLink_related <- TFLink[(TFLink$UniprotID.TF %in% TF_ls | TFLink$UniprotID.Target %in% TF_ls),]
#TFLink_related <- TFLink_related[(TFLink_related$UniprotID.Target %in% c(TF_ls,Anno_GeneLs$Entry)),]
head(TFLink_related)
Add the O-GlcNAc site information from the O-GlcNAcAtlas database.
First we have to retrieve the UniPort Accession number for each genes
library("httr")
library("dplyr")
library("jsonlite")
library("curl")
library("RCurl")
library("magrittr")
library("rlist")
library("pipeR")
library("plyr")
library("xml2")
library("rvest")
library("parallel")
library("doParallel")
library("doSNOW")
# Fetch O-GlcNAc records from https://www.oglcnac.org/atlas/search/
temp2 <- data.frame()
Uniportls <- unique(TFLink_related$UniprotID.TF)
Par_cl <- makeCluster(parallelly::freeCores()[1])
registerDoSNOW(Par_cl)
Par_pb <- txtProgressBar(min = 1, max = length(Uniportls), style = 3)
progress <- function(n) setTxtProgressBar(Par_pb, n)
Par_opts <- list(progress = progress)
temp2 <- foreach(
i = 1:length(Uniportls), .combine = base::rbind, .errorhandling = "pass",
.options.snow = Par_opts, .packages = c("tcltk", "dplyr", "utils")
) %dopar% {
Query <- Uniportls[i]
url <- c("https://www.oglcnac.org/atlas/search/")
pg <- rvest::session(url)
token <- xml2::xml_attrs(xml2::xml_find_all(xml2::read_html(pg), ".//input")[[1]])[["value"]]
myheaders <- c(
'accept' ='text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-encoding' = 'gzip, deflate, br',
'accept-language' = 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6,ja;q=0.5',
'cache-control' = 'max-age=0',
'content-length' = as.character(nchar(Query) + 120),
'content-type' = 'application/x-www-form-urlencoded',
'cookie' = stringr::str_split(pg[["response"]][["headers"]][["set-cookie"]],";")[[1]][1],
'dnt' = '1',
'origin' = 'https://oglcnac.org',
'referer' = 'https://oglcnac.org/atlas/search/',
'sec-ch-ua' = '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
'sec-ch-ua-mobile' = '?0',
'sec-ch-ua-platform' = 'Linux',
'sec-fetch-dest' = 'document',
'sec-fetch-mode' = 'navigate',
'sec-fetch-site' = 'same-origin',
'sec-fetch-user' = '?1',
'upgrade-insecure-requests' = '1',
'user-agent' = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
)
response <- httr::POST(
url = url,
httr::add_headers(.headers = myheaders),
body = list(
"csrfmiddlewaretoken" = token,
"search_term" = Query,
"search_field" = "Accession"
),
encode = "form"
)
if (httr::status_code(response) != 200) {
out <- data.frame(
Query = Query,
Accession = "Failure",
Entry_Name = "Failure",
Protein_Name = "Failure",
Gene_Name = "Failure",
Position_in_Protein = "Failure",
Status = httr::status_code(response),
"O-GlcNAc" = "Unknown"
)
} else if (rvest::html_table(xml2::read_html(response))[[2]] %>% as.data.frame() %>% nrow() == 0) {
out <- data.frame(
Query = Query,
Accession = "No_records",
Entry_Name = "No_records",
Protein_Name = "No_records",
Gene_Name = "No_records",
Position_in_Protein = "No_records",
Status = httr::status_code(response),
"O-GlcNAc" = "Non"
)
} else if (rvest::html_table(xml2::read_html(response))[[2]] %>% as.data.frame() %>% nrow() != 0) {
out <- rvest::html_table(xml2::read_html(response))[[2]] %>% as.data.frame()
out <- data.frame(
Query = Query,
Accession = out$Accession,
Entry_Name = out$`Entry Name`,
Protein_Name = out$`Protein Name`,
Gene_Name = out$`Gene Name`,
Position_in_Protein = out$`Position in Protein`,
Status = httr::status_code(response),
"O-GlcNAc" = "Yes"
)
}
out
}
|===================================================================================================================================== | 100%
|======================================================================================================================================| 100%
stopCluster(Par_cl)
OGlcNAc_Anno <- temp2
Add O-GlcNAc info to TFs.
OGlcNAc <- rep("No", nrow(TFLink_related))
TFLink_related <- cbind(OGlcNAc, TFLink_related)
TFLink_related$OGlcNAc[TFLink_related$UniprotID.TF %in% OGlcNAc_Anno$Accession]<-"Yes"
head(TFLink_related)
Add DEGs info to TFs.
DEGs <- DEGs_ls[["HEK293_TMG_6hBvsHEK293_OSMI2_6hA"]][["DEGs_raw"]]
DEGs <- DEGs[DEGs$Is.Sig.=="Yes",]
is.DEGs <- rep("No", nrow(TFLink_related))
TFLink_related <- cbind(is.DEGs, TFLink_related)
TFLink_related$is.DEGs[TFLink_related$UniprotID.TF %in% DEGs$UniProtKBID]<-"Yes"
head(TFLink_related)
Filter out the TFs wihout both significantly changes and O-GlcNAc sites.
TFLink_related <- TFLink_related[(TFLink_related$is.DEGs == "Yes" | TFLink_related$OGlcNAc == "Yes"),]
TFLink_related <- TFLink_related[(TFLink_related$OGlcNAc == "Yes"),]
dim(TFLink_related)
[1] 105492 17
head(TFLink_related)
Remove the records that not related to both of Mitochondrial/Cytoskeletal genes and remained TFs.
ChkLs <- unique(c(TFLink_related$UniprotID.TF, Anno_GeneLs$Entry))
TFLink_related <- TFLink_related[TFLink_related$UniprotID.Target %in% ChkLs,]
dim(TFLink_related)
[1] 105492 17
head(TFLink_related)
Make literature mining functions
#Load functions#
RetrieveEntrezID <- function(
Homologues=FALSE,
Homo_species=Homo_species,
Par_species=Par_species,
Par_species.Uniprot=Par_species.Uniprot,
GenesLs=GenesLs,
updateProgress = NULL
){
#|----Set up Biomart parameters----|####
Ann_0mart<-NULL
attempt<-1
attempt_max<-12
while(is.null(Ann_0mart)&&attempt<13){
if(attempt%in%seq(from=1,to=attempt_max,3)){
url<-"https://useast.ensembl.org"
}else if(attempt%in%seq(from=2,to=attempt_max,3)){
url<-"https://www.ensembl.org"
}else if(attempt%in%seq(from=3,to=attempt_max,3)){
url<-"https://useast.ensembl.org/"
}else if(attempt%in%seq(from=4,to=attempt_max,3)){
url<-"https://asia.ensembl.org/"
}
try({
Ann_0mart <- useMart("ENSEMBL_MART_ENSEMBL", host=url) %>%
useDataset(Par_species, .)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(Ann_0mart)){Sys.sleep(10)}
}
if(Homologues){
if (is.function(updateProgress)) {
text <- paste0("Get Homologoeus Gene in ", Homo_species)
updateProgress(detail = text)
}
#listAttributes(Ann_0mart)%>%View()
temp <- NULL
attempt<-1
attempt_max<-12
while(is.null(temp)&&attempt<13){
try({
temp <- getBM(
mart=Ann_0mart,
attributes=c("ensembl_gene_id",
Homo_species),
filter="ensembl_gene_id",
values=GenesLs,
uniqueRows = T)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(temp)){Sys.sleep(10)}
}
temp<-temp[!temp[,2]%in%"",]
temp<-temp[!duplicated(temp[,2]),]
NaMapping<-temp[,1]
names(NaMapping)<-temp[,2]
print(Homo_species)
if(is_empty(temp)|nrow(temp)<1){
return(NA)
}
#print("Set up Biomart parameters")
#|----Set up Biomart parameters----|####
if(Homo_species == "hsapiens_homolog_ensembl_gene"){
Par_species <- "hsapiens_gene_ensembl"
}else if(Homo_species == "mmusculus_homolog_ensembl_gene"){
Par_species <- "mmusculus_gene_ensembl"
}else if(Homo_species == "rnorvegicus_homolog_ensembl_gene"){
Par_species <- "rnorvegicus_gene_ensembl"
}else if(Homo_species == "sscrofa_homolog_ensembl_gene"){
Par_species <- "sscrofa_gene_ensembl"
}
Ann_0mart<-NULL
attempt<-1
attempt_max<-12
while(is.null(Ann_0mart)&&attempt<13){
if(attempt%in%seq(from=1,to=attempt_max,3)){
url<-"https://useast.ensembl.org"
}else if(attempt%in%seq(from=2,to=attempt_max,3)){
url<-"https://www.ensembl.org"
}else if(attempt%in%seq(from=3,to=attempt_max,3)){
url<-"https://useast.ensembl.org/"
}else if(attempt%in%seq(from=4,to=attempt_max,3)){
url<-"https://asia.ensembl.org/"
}
try({
Ann_0mart <- useMart("ENSEMBL_MART_ENSEMBL", host=url) %>%
useDataset(Par_species, .)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(Ann_0mart)){Sys.sleep(10)}
}
Anno_gene <- NULL
attempt<-1
attempt_max<-12
while(is.null(Anno_gene)&&attempt<13){
try({
Anno_gene <- getBM(
mart=Ann_0mart,
attributes=c("ensembl_gene_id","entrezgene_id","gene_biotype",
"external_gene_name","description"),
filter="ensembl_gene_id",
values=temp[,2],
uniqueRows = T)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(Anno_gene)){Sys.sleep(10)}
}
rm(temp)
}else{
print("getBM")
if (is.function(updateProgress)) {
text <- "Get PubMed Gene ID"
updateProgress(detail = text)
}
Anno_gene <- NULL
attempt<-1
attempt_max<-12
while(is.null(Anno_gene)&&attempt<13){
try({
Anno_gene <- getBM(
mart=Ann_0mart,
attributes=c("ensembl_gene_id","entrezgene_id","gene_biotype",
"external_gene_name","description"),
filter="ensembl_gene_id",
values=GenesLs,
uniqueRows = T)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(Anno_gene)){Sys.sleep(10)}
}
}
Anno<-Anno_gene[!duplicated(Anno_gene$ensembl_gene_id),]
rownames(Anno)<-Anno$ensembl_gene_id
Anno<-data.frame(
ensembl_gene_id=Anno$ensembl_gene_id,
entrezgene_id=Anno$entrezgene_id,
external_gene_name=Anno$external_gene_name,
gene_biotype=Anno$gene_biotype,
row.names = rownames(Anno)
)
Anno2<-Anno[!is.na(Anno$entrezgene_id),] #For EntrezGeneID
Anno2<-Anno2[!duplicated(Anno2[,'ensembl_gene_id']),] #For EntrezGeneID
rownames(Anno2)<-Anno2$ensembl_gene_id
temp<-Anno[is.na(Anno$entrezgene_id),"ensembl_gene_id"]
print("Check EntrezGeneID again")
#|----Check EntrezGeneID again----|####
if(!is_empty(temp)){
if(length(temp)<2){
NCBIcheck<-read_html(
paste0(
"https://www.ncbi.nlm.nih.gov/gene/?term=",
temp
)
)%>%xml_find_all(., ".//span")%>%xml_text()%>%
grep("Gene ID",.,value = TRUE)
if(!isEmpty(NCBIcheck)){
NCBIcheck<-NCBIcheck%>%unlist()%>%
str_extract_all(.,"(\\d)+")%>%.[[1]]%>%.[1]
}else{
NCBIcheck<-NA
}
out<-data.frame(
ensembl_gene_id=temp,
entrezgene_id=NCBIcheck
)%>%as.data.frame()
temp2<-out
}else{
Par_cl<-makeCluster(parallelly::freeConnections()-1)
registerDoSNOW(Par_cl)
Par_pb <- txtProgressBar(min=1, max=length(temp), style = 3)
progress <- function(n) setTxtProgressBar(Par_pb, n)
Par_opts<-list(progress = progress)
#Refer here for all column names for Entrez API (https://www.ncbi.nlm.nih.gov/books/NBK25501/)
temp2<-foreach(i=1:length(temp), .combine=rbind, .errorhandling = "pass", .inorder=FALSE,
.options.snow=Par_opts, .packages = c("dplyr","utils","xml2","S4Vectors","stringr","BiocGenerics")) %dopar% {
NCBIcheck<-read_html(
paste0(
"https://www.ncbi.nlm.nih.gov/gene/?term=",
temp[i]
)
)%>%xml_find_all(., ".//span")%>%xml_text()%>%
grep("Gene ID",.,value = TRUE)
if(!isEmpty(NCBIcheck)){
NCBIcheck<-NCBIcheck%>%unlist()%>%
str_extract_all(.,"(\\d)+")%>%.[[1]]%>%.[1]
}else{
NCBIcheck<-NA
}
out<-data.frame(
ensembl_gene_id=temp[i],
entrezgene_id=NCBIcheck
)%>%as.data.frame()
return(out)
}
stopCluster(Par_cl)
}
temp2<-temp2[!is.na(temp2$entrezgene_id),]
temp2<-temp2[!duplicated(temp2$ensembl_gene_id),]
rownames(temp2)<-temp2$ensembl_gene_id
temp2<-cbind(
temp2,
Anno[temp2$ensembl_gene_id,c(3:4)]
)
#sum(Anno2$ensembl_gene_id%in%temp2$ensembl_gene_id)==0
Anno2<-rbind(
Anno2,
temp2
)
}
if(Homologues){
Anno2<-cbind(mapping=NaMapping[Anno2$ensembl_gene_id],Anno2)
Anno2<-Anno2[!duplicated(Anno2$mapping),]
rownames(Anno2) <- NaMapping[Anno2$ensembl_gene_id]
Anno2<-Anno2[,-1]
}
print("end")
if (is.function(updateProgress)) {
text <- paste0("ID Convert Done ")
updateProgress(detail = text)
}
return(Anno2)
}
pseudoLog10 <- function(x){ asinh(x/2)/log(10) }
WZY_GetSummaryText<-function(p){
pdata <- data.frame(name = p$data$label, color2 = p$data$group)
pdata <- pdata[!is.na(pdata$name), ]
cluster_color <- unique(pdata$color2)
cluster_label <- sapply(
cluster_color, enrichplot:::get_wordcloud,
ggData = pdata, nWords = 4
) %>% paste0(.,collapse = " ") %>% str_split(., " ") %>% unlist()
return(cluster_label)
}
WZY_literatureMining<-function(GenesLs=GenesLs, Mesh_term4Interesting=Mesh_term4Interesting, Species=Species, updateProgress = NULL){
out<-list()
N<-paste0(#To search for the total number of PubMed citations, enter all[sb] in the search box.(20210506)
"https://pubmed.ncbi.nlm.nih.gov/?term=",
URLencode('all[sb]'),
"&sort=date"
)%>%read_html()%>%html_nodes(".value")%>%html_text()%>%.[1]%>%gsub(",","",.)%>%as.numeric()
K<-paste0(#the amounts of literature related to osteoblast differentiation #20210506# PubMed Query:
"https://pubmed.ncbi.nlm.nih.gov/?term=",
URLencode(Mesh_term4Interesting),
"&sort=date"
)%>%read_html()%>%html_nodes(".value")%>%html_text()%>%.[1]%>%gsub(",","",.)%>%as.numeric()
Thresh<-K/N*10
####|prepare gene-related articles|####
# hsapiens_gene_ensembl; mmusculus_gene_ensembl; rnorvegicus_gene_ensembl; sscrofa_gene_ensembl
# Homo sapiens (Human) [9606]; Mus musculus (Mouse) [10090]; Rattus norvegicus (rat) [10116]; Sus scrofa (pig) [9823]
# (human) Homo sapiens (human) [hsa]; (mouse) Mus musculus (mouse) [mmu]; (rat) Rattus norvegicus [rno]; (pig) Sus scrofa [ssc]
# hsapiens_homolog_ensembl_gene; mmusculus_homolog_ensembl_gene; rnorvegicus_homolog_ensembl_gene; sscrofa_homolog_ensembl_gene
print(GenesLs)
print(Mesh_term4Interesting)
print(Species)
if (is.function(updateProgress)) {
text <- "Get All Homologoeus Genes"
updateProgress(detail = text)
}
if(Species == "Hsa"){
Par_species <- "hsapiens_gene_ensembl"
Par_species.Uniprot <- "9606"
Homo_species <- c(
"mmusculus_homolog_ensembl_gene",
"rnorvegicus_homolog_ensembl_gene",
"sscrofa_homolog_ensembl_gene"
)
}else if(Species == "Mus"){
Par_species <- "mmusculus_gene_ensembl"
Par_species.Uniprot <- "10090"
Homo_species <- c(
"hsapiens_homolog_ensembl_gene",
"rnorvegicus_homolog_ensembl_gene",
"sscrofa_homolog_ensembl_gene"
)
}else if(Species == "Rno"){
Par_species <- "rnorvegicus_gene_ensembl"
Par_species.Uniprot <- "10116"
Homo_species <- c(
"mmusculus_homolog_ensembl_gene",
"hsapiens_homolog_ensembl_gene",
"sscrofa_homolog_ensembl_gene"
)
}else if(Species == "Ssc"){
Par_species <- "sscrofa_gene_ensembl"
Par_species.Uniprot <- "9823"
Homo_species <- c(
"mmusculus_homolog_ensembl_gene",
"rnorvegicus_homolog_ensembl_gene",
"hsapiens_homolog_ensembl_gene"
)
}
print(Par_species)
print(Par_species.Uniprot)
query.res<-RetrieveEntrezID(Par_species=Par_species, Par_species.Uniprot=Par_species.Uniprot, GenesLs=GenesLs)
homologeus1 <- RetrieveEntrezID(
Homologues=TRUE, Homo_species=Homo_species[1], Par_species=Par_species,
Par_species.Uniprot=Par_species.Uniprot, GenesLs=GenesLs, updateProgress = updateProgress
)
homologeus2 <- RetrieveEntrezID(
Homologues=TRUE, Homo_species=Homo_species[2], Par_species=Par_species,
Par_species.Uniprot=Par_species.Uniprot, GenesLs=GenesLs, updateProgress = updateProgress
)
homologeus3 <- RetrieveEntrezID(
Homologues=TRUE, Homo_species=Homo_species[3], Par_species=Par_species,
Par_species.Uniprot=Par_species.Uniprot, GenesLs=GenesLs, updateProgress = updateProgress
)
#Calculate results and construct the literature mining table
literature.mining<-data.frame(matrix(nrow = 0, ncol=14))
colnames(literature.mining)<-c("ensembl_gene_id","entrezgene_id","GeneSymbol",
"gene_biotype", "N", "K", "n", "k", "R", "10x Background of R","P.value",
"Gene related PMID", "Overlapped PMID", "Overlapped Genes [Article No]")
# Par_pb <- txtProgressBar(min=1, max=nrow(query.res), style = 3)
print("StartLoop")
list.packages <- c(
"readxl", "ggplot2", "ggpubr", "matrixStats", "ggrepel", "reshape2", "dplyr", "stringr",
"grid", "tcltk", "parallel", "doParallel", "doSNOW", "data.table", "gplots",
"randomcoloR", "factoextra", "RColorBrewer", "grDevices", "gmp", "xtable", "latex2exp",
"httr", "jsonlite", "curl", "RCurl", "magrittr", "rlist", "pipeR", "plyr",
"xml2", "rvest", "knn.covertree", "knitr", "rlang", "visNetwork", "hwriter", "htmltools"
)
bio.list.packages <- c(
"limma", "ExperimentHub", "clusterProfiler", "GO.db",
"org.Mm.eg.db", "pathview", "enrichplot", "org.Hs.eg.db"
)
wzy_mainloop <- function(
query.res = query.res, attempt_max = 3, Thresh = Thresh,
K = K, N = N, i = i
){
GeneID<-c(
query.res$entrezgene_id[i],
homologeus1[query.res$ensembl_gene_id[i],"entrezgene_id"],
homologeus2[query.res$ensembl_gene_id[i],"entrezgene_id"],
homologeus3[query.res$ensembl_gene_id[i],"entrezgene_id"]
)
#### Calculate n
# Get UID number that within interested MeSH term
uid.res<-c()
for(a in GeneID){
# Retrieve PMID of gene-related articles
# https://www.ncbi.nlm.nih.gov/books/NBK25497/table/chapter2.T._entrez_unique_identifiers_ui/?report=objectonly
# https://www.ncbi.nlm.nih.gov/books/NBK25499/
# https://dataguide.nlm.nih.gov/eutilities/utilities.html
# https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=gene&db=pubmed&id=68389&linkname=homologene_pubmed
# https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=gene&db=pubmed&id=12393
URL_temp<-NULL
attempt<-1
url <- paste0(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=gene&db=pubmed&id=",
a
)
while(is.null(URL_temp)&&attempt<attempt_max){
try({
url <- url(url, "rb")
if(class(url)[1]=="url"){
URL_temp <- read_xml(
url
)%>%xml_find_all(., ".//Id")%>%xml_text()
close(url)
}
}, silent = TRUE)
attempt<-attempt+1
if(is.null(URL_temp)){Sys.sleep(round(runif(1,10,180),1))}
}
uid.res<-c(
uid.res,
URL_temp
)
}
uid.res<-unique(uid.res)
n<-length(uid.res)
#### Calculate k
maxlimit<-125
if(n>maxlimit){
step<-n%/%maxlimit
mod<-n%%maxlimit
start<-seq(from=1, to=maxlimit*step, by=maxlimit)
end<-seq(from=maxlimit, to=maxlimit*step, by=maxlimit)
if(mod>0){
start<-c(start, tail(end, 1)+1)
end<-c(end, n)
}
step<-length(start)
}else{
step<-1
start<-1
end<-n
}
k<-0
overlapped.uid<-c()
for(a in 1:step){
URL_temp<-NULL
attempt<-1
url <- URLencode(
paste0(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=",
paste(uid.res[start[a]:end[a]],collapse=","),"[UID]+AND+(",URLencode(Mesh_term4Interesting),")",
"&rettype=count"
)
)
while(is.null(URL_temp)&&attempt<attempt_max){
try({
url <- url(url, "rb")
if(class(url)[1]=="url"){
URL_temp <- read_xml(#the amounts of articles of each gene on interested MeSH term:
url
)%>%xml_find_all(., ".//Count")%>%xml_text()%>%.[1]%>%as.numeric()
close(url)
}
}, silent = TRUE)
attempt<-attempt+1
if(is.null(URL_temp)){Sys.sleep(round(runif(1,10,180),1))}
}
if(is.null(URL_temp)){URL_temp <- 0}
k<-k+URL_temp
URL_temp<-NULL
attempt<-1
url <- URLencode(
paste0(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=",
paste(uid.res[start[a]:end[a]],collapse=","),"[UID]+AND+(",URLencode(Mesh_term4Interesting),")",
"&rettype=unlist"
)
)
while(is.null(URL_temp)&&attempt<attempt_max){
try({
url <- url(url, "rb")
if(class(url)[1]=="url"){
URL_temp <- read_xml(#the amounts of articles of each gene on interested MeSH term:
url
)%>%xml_find_all(., ".//Id")%>%xml_text()
close(url)
}
}, silent = TRUE)
attempt<-attempt+1
if(is.null(URL_temp)){Sys.sleep(round(runif(1,10,180),1))}
}
overlapped.uid<-c(
overlapped.uid,
URL_temp
)
}
if(is.na(k)){k<-0}
#### Calculate P
R<-k/n
P<-0
if(k<1){
P<-0
}else{
for (a in 0:(k-1)) {
P<-P+((chooseZ(K,a)*chooseZ(N-K,n-a))/chooseZ(N,n))
}
}
P<-asNumeric(1-P)
if(P==0){P<-1*10^-300}
temp<-data.frame(
ensembl_gene_id=query.res$ensembl_gene_id[i],
entrezgene_id=query.res$entrezgene_id[i],
GeneSymbol=query.res$external_gene_name[i],
gene_biotype=query.res$gene_biotype[i],
N=N,K=K,n=n,k=k,R=R,Thresh=Thresh,P=P,
GenePMID=uid.res%>%paste(.,collapse = ","),
OverlapPMID=overlapped.uid%>%paste(.,collapse = ",")
)
colnames(temp)<-c("ensembl_gene_id","entrezgene_id","GeneSymbol",
"gene_biotype", "N", "K", "n", "k", "R", "10x Background of R","P.value",
"Gene related PMID", "Overlapped PMID")
return(temp)
}
Par_cl<-makeCluster(parallelly::freeConnections()-2)
registerDoSNOW(Par_cl)
Par_pb <- txtProgressBar(min=1, max=nrow(query.res), style = 3)
progress <- function(n) setTxtProgressBar(Par_pb, n)
Par_opts<-list(progress = progress)
#Refer here for all column names for Entrez API (https://www.ncbi.nlm.nih.gov/books/NBK25501/)
temp<-foreach(
i=1:nrow(query.res), .combine=rbind, .errorhandling = "remove", .inorder=FALSE, .verbose = FALSE,
.options.snow=Par_opts, .packages = c(list.packages, bio.list.packages)
) %dopar% {
temp3 <- NULL
temp3 <- wzy_mainloop(query.res = query.res, attempt_max = 5, Thresh = Thresh, K = K, N = N, i = i)
if(is.null(temp3)){
temp3<-data.frame(
ensembl_gene_id=query.res$ensembl_gene_id[i],
entrezgene_id=query.res$entrezgene_id[i],
GeneSymbol=query.res$external_gene_name[i],
gene_biotype=query.res$gene_biotype[i],
N=NA,K=NA,n=NA,k=NA,R=NA,Thresh=NA,P=NA,
GenePMID=NA,
OverlapPMID=NA
)
colnames(temp3)<-c("ensembl_gene_id","entrezgene_id","GeneSymbol",
"gene_biotype", "N", "K", "n", "k", "R", "10x Background of R","P.value",
"Gene related PMID", "Overlapped PMID")
}
return(temp3)
}
stopCluster(Par_cl)
literature.mining<-rbind(literature.mining,temp)
rm(temp)
gc()
print("EndLoop")
#Correcting P-value for multiple testing
literature.mining<-cbind(literature.mining[,1:11],
p.adj=p.adjust(as.numeric(literature.mining$P.value),method = "BH"),
literature.mining[,12:13])
out<-list(
literature.mining=literature.mining,
query.res=query.res,
homologeus1=homologeus1,
homologeus2=homologeus2,
homologeus3=homologeus3
)
print("end")
return(out)
}
literatureMiningPlot<-function(
literature.mining=out$literature.mining, Thresh.R=NULL,
use.Padj = FALSE, Thresh.P=0.05, label=TRUE, label_n=5
){
# print("Plotting")
# Plotting Results
literature.mining$n<-as.numeric(literature.mining$n)
literature.mining$k<-as.numeric(literature.mining$k)
literature.mining$R<-as.numeric(literature.mining$R)
if(is.null(Thresh.R)){
Thresh<-literature.mining$`10x Background of R`[1]
}else{
Thresh<-Thresh.R
}
df<-literature.mining[,1:12]
df$R<-df$R*1000
df$n<-log10(df$n)
df$R<-pseudoLog10(df$R)
df$p.adj<-pseudoLog10(log(1/df$p.adj,2))
df$P.value<-pseudoLog10(log(1/df$P.value,2))
df <- df[order(df$P.value), ]
maxR<-(max(df$R, na.rm=T)+0.3)%>%round(., digits = 1)
maxP<-(max(df$p.adj, na.rm=T)+0.3)%>%round(., digits = 1)
RepL_R<-length(rep(0:maxR, each = 9))%/%9
RepL_P<-length(rep(0:maxP, each = 9))%/%9
if(use.Padj){
gplot<-ggplot(df,aes(x=p.adj, y=R, colour=n, size=k ))
}else{
gplot<-ggplot(df,aes(x=P.value, y=R, colour=n, size=k ))
}
gplot<-gplot +
geom_point() +
expand_limits(x=0) +
scale_y_continuous(
limits = c(0,maxR),
breaks = 0:maxR,
guide = "prism_offset_minor",
minor_breaks = pseudoLog10(rep(1:9, RepL_R)*(10^rep(0:maxR, each = 9))),
labels = function(lab){
do.call(
expression,
lapply(paste(lab), function(x) {
if(x==0){bquote(bold("0"))}else{bquote(bold("10"^.(x)))}
})
)
}
)+
scale_x_continuous(
limits = c(0,maxP),
breaks = 0:maxP,
guide = "prism_offset_minor",
minor_breaks = pseudoLog10(rep(1:9, RepL_P)*(10^rep(0:maxP, each = 9))),
labels = function(lab){
do.call(
expression,
lapply(paste(lab), function(x) {
if(x==0){bquote(bold("0"))}else{bquote(bold("10"^.(x)))}
})
)
}
)+
scale_size(range = c(0,20))+
theme(
axis.text=element_text(size=12),
axis.title=element_text(size=14,face="bold"),
axis.ticks.length = unit(5,"pt"),
axis.ticks = element_line(linewidth = 0.7),
)+
coord_cartesian(clip = "off")+
geom_hline(yintercept=pseudoLog10(Thresh*1000), linetype="dashed",
color = "red", size=0.4)+
geom_vline(xintercept=pseudoLog10(log(1/Thresh.P,2)), linetype="dashed",
color = "red", size=0.4)+
labs(x="log2(1/P.value)",
y="R (‰)",
title="",
colour="log10(Gene Related Articles No.)", size="Topics\nRelated Articles No.")+
annotation_custom(
grob = grid::linesGrob(gp=gpar(lwd=2)),
xmin = -Inf,
xmax = -Inf,
ymin = 0,
ymax = maxR
)+
annotation_custom(
grob = grid::linesGrob(gp=gpar(lwd=2)),
xmin = 0,
xmax = maxP,
ymin = -Inf,
ymax = -Inf
)
#print("PlottingEnd")
if(label_n == "All"){
label_n <- nrow(df)
}
if(label){
if(use.Padj){
df <- df[df$R>=pseudoLog10(Thresh*1000) & df$p.adj>=pseudoLog10(log(1/Thresh.P,2)), ]
}else{
df <- df[df$R>=pseudoLog10(Thresh*1000) & df$P.value>=pseudoLog10(log(1/Thresh.P,2)), ]
}
df <- df[order(df$P.value, decreasing = TRUE), ]
gplot <- gplot + geom_text_repel(
data = df[1:label_n, ],
aes(label=GeneSymbol),hjust=0, vjust=0, size=8, color="darkred"
)
}
return(gplot)
}
literatureMiningOverlappedGenes<-function(out, updateProgress = NULL){
literature.mining<-out$literature.mining
query.res<-out$query.res
homologeus1<-out$homologeus1
homologeus2<-out$homologeus2
homologeus3<-out$homologeus3
outls<-c()
for(i in 1:nrow(query.res)){
print(paste0(i,"/",nrow(query.res)))
if (is.function(updateProgress)) {
text <- paste0(i," in ",nrow(query.res))
updateProgress(message = text, detail = text)
}
GeneID<-c(
query.res$entrezgene_id[i],
homologeus1[query.res$ensembl_gene_id[i],"entrezgene_id"],
homologeus2[query.res$ensembl_gene_id[i],"entrezgene_id"],
homologeus3[query.res$ensembl_gene_id[i],"entrezgene_id"]
)%>%.[!is.na(.)]
overlapped.uid<-literature.mining$`Overlapped PMID`[i]%>%str_split(.,",")%>%unlist()
overlapped.genes<-data.frame(matrix(nrow = 0, ncol=3))
colnames(overlapped.genes)<-c("GeneSymbol","entrezgene_id","Overlapped_PMID")
if(overlapped.uid[1]!=""){
for(uid in overlapped.uid){
print(paste0("PMID:", uid, " (", which(overlapped.uid==uid)," of ",length(overlapped.uid), ")"))
temp.gene<-read_xml(
paste0(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&db=gene&id=",
uid
)
)%>%xml_find_all(., ".//Id")%>%xml_text()%>%.[-1]%>%unique()%>%.[!.%in%GeneID]
if(is_empty(temp.gene)){next}
if(length(temp.gene)>500){next}
a.n<-length(temp.gene)
maxlimit<-50
if(a.n>maxlimit){
step<-a.n%/%maxlimit
mod<-a.n%%maxlimit
start<-seq(from=1, to=maxlimit*step, by=maxlimit)
end<-seq(from=maxlimit, to=maxlimit*step, by=maxlimit)
if(mod>0){
start<-c(start, tail(end, 1)+1)
end<-c(end, a.n)
}
step<-length(start)
}else{
step<-1
start<-1
end<-a.n
}
if(step>1){
pb <- txtProgressBar(min=1, max=step, style = 3)
}
removeIdx<-c()
GeneSymbol<-c()
for(a in 1:step){
#print(paste0("b:",which(temp.gene==b),";",length(temp.gene)))
GSB<-read_html(
paste0(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=",
temp.gene[start[a]:end[a]]%>%paste(.,collapse = ","),
"&rettype=unlist"
)
)
lh<-(end[a]-start[a]+1)
if(lh==1){
test<-xml_child(xml_child(xml_child(xml_child(xml_child(GSB, 1), 1), 1), 1), 2)%>%xml_text()%>%grep("Official Symbol:",.)
if(!is_empty(test)){
GeneSymbol<-c(
GeneSymbol,
xml_child(xml_child(xml_child(xml_child(xml_child(xml_child(xml_child(GSB, 1), 1), 1), 1), 2), 2), 2)%>%xml_text()
)
}else{
removeIdx<-c(
removeIdx,1
)
}
}else{
for(b in 1:lh){
test<-xml_child(xml_child(xml_child(xml_child(xml_child(GSB, 1), 1), 1), b), 2)%>%xml_text()%>%grep("Official Symbol:",.)
if(!is_empty(test)){
GeneSymbol<-c(
GeneSymbol,
xml_child(xml_child(xml_child(xml_child(xml_child(xml_child(xml_child(xml_child(GSB, 1), 1), 1), b), 2), 2), 2), 2)%>%xml_text()
)
}else{
removeIdx<-c(
removeIdx,(b+start[a]-1)
)
}
}
}
if(step>1){
setTxtProgressBar(pb, a)
}
if(is.function(updateProgress)) {
text <- paste0(a," in ",step)
updateProgress(message = paste0(i," in ",nrow(query.res)), detail = text)
}
}
if(step>1){close(pb)}
if(!is_empty(removeIdx)){
temp.gene<-temp.gene[-removeIdx]
}
if(!is_empty(temp.gene)){
temp<-data.frame(
GeneSymbol=GeneSymbol%>%str_to_title(),
entrezgene_id=temp.gene,
Overlapped_PMID=rep(uid,length(temp.gene))
)
overlapped.genes<-rbind(
overlapped.genes,
temp
)
}
}
}
temp<-c()
OrderID<-c()
for(a in overlapped.genes$GeneSymbol%>%unique()){
temp2<-overlapped.genes[overlapped.genes$GeneSymbol==a,]
OrderID<-c(
OrderID,
length(temp2$Overlapped_PMID%>%unique())
)
temp<-c(
temp,
paste0(
temp2$GeneSymbol[1],
" (EntrezID:", temp2$entrezgene_id[1], ";",
"Overlapped_PMID[", tail(OrderID,1), "]:", paste(temp2$Overlapped_PMID%>%unique(),collapse = ","),")"
)
)
}
OrderID<-as.numeric(OrderID)
OrderID<-order(x=OrderID, decreasing=TRUE, na.last=T)
overlapped.genes<-temp%>%.[OrderID]%>%paste(.,collapse = " ")
outls<-c(
outls,overlapped.genes
)
}
overlapped.genes<-cbind(
literature.mining[,1:4],
overlapped.genes=outls
)
return(overlapped.genes)
}
library(rlang)
library(ggprism)
Calcuate relativity of the TFs to the Mesh term “Osteoblasts”[Mesh] using a literature mining method.
TFs <- select(org.Hs.eg.db, keys = unique(TFLink_related$Name.TF), columns = c('ENSEMBL'), keytype = "SYMBOL")
'select()' returned 1:many mapping between keys and columns
Mesh_term4Interesting <- '"Osteoblasts"[Mesh]'
GenesLs <- TFs$ENSEMBL[!is.na(TFs$ENSEMBL)]
Species <- "Hsa" # Hsa Mus Rno Ssc
TFs.Mesh<-WZY_literatureMining(GenesLs=GenesLs, Mesh_term4Interesting=Mesh_term4Interesting, Species=Species, updateProgress = NULL)
[1] "ENSG00000136997" "ENSG00000118260" "ENSG00000177606" "ENSG00000141510" "ENSG00000141905" "ENSG00000173039" "ENSG00000126561"
[8] "ENSG00000107485" "ENSG00000168610" "ENSG00000096717" "ENSG00000128573" "ENSG00000134323" "ENSG00000185591" "ENSG00000100811"
[15] "ENSG00000187098" "ENSG00000105698" "ENSG00000115641" "ENSG00000109320" "ENSG00000185122" "ENSG00000284774" "ENSG00000118689"
[22] "ENSG00000137203" "ENSG00000150907" "ENSG00000175592" "ENSG00000106459" "ENSG00000118058" "ENSG00000138385" "ENSG00000102878"
[29] "ENSG00000102804" "ENSG00000001167" "ENSG00000134954" "ENSG00000102974" "ENSG00000143437" "ENSG00000123405" "ENSG00000275700"
[36] "ENSG00000276072" "ENSG00000143162" "ENSG00000170345" "ENSG00000257923" "ENSG00000097007" "ENSG00000115415" "ENSG00000126767"
[43] "ENSG00000091831" "ENSG00000066136" "ENSG00000129514" "ENSG00000112592" "ENSG00000130816" "ENSG00000130522" "ENSG00000071564"
[50] "ENSG00000113580" "ENSG00000154727" "ENSG00000132170" "ENSG00000081189" "ENSG00000168214" "ENSG00000074047" "ENSG00000182979"
[57] "ENSG00000077092" "ENSG00000065978" "ENSG00000136352" "ENSG00000007372" "ENSG00000134982" "ENSG00000120738" "ENSG00000149311"
[64] "ENSG00000064393" "ENSG00000140464" "ENSG00000069667" "ENSG00000172845" "ENSG00000102554" "ENSG00000078403" "ENSG00000120837"
[71] "ENSG00000132005" "ENSG00000288283" "ENSG00000141646" "ENSG00000162924" "ENSG00000144791" "ENSG00000143190" "ENSG00000020633"
[78] "ENSG00000077150" "ENSG00000115966" "ENSG00000072310" "ENSG00000074319" "ENSG00000171843" "ENSG00000106462" "ENSG00000104964"
[85] "ENSG00000178028" "ENSG00000121022" "ENSG00000121481" "ENSG00000074181" "ENSG00000125398" "ENSG00000172943" "ENSG00000072364"
[92] "ENSG00000141867" "ENSG00000198026" "ENSG00000136807" "ENSG00000147140" "ENSG00000126457" "ENSG00000151702" "ENSG00000125686"
[99] "ENSG00000111206" "ENSG00000099956" "ENSG00000275837" "ENSG00000183337" "ENSG00000108468" "ENSG00000075426" "ENSG00000164032"
[106] "ENSG00000072501" "ENSG00000117222" "ENSG00000136715" "ENSG00000164754" "ENSG00000185022" "ENSG00000108055" "ENSG00000185811"
[113] "ENSG00000066422" "ENSG00000100393" "ENSG00000272333" "ENSG00000108312" "ENSG00000067955" "ENSG00000136504" "ENSG00000173473"
[120] "ENSG00000177485" "ENSG00000088038" "ENSG00000276082" "ENSG00000273943" "ENSG00000275979" "ENSG00000274941" "ENSG00000274176"
[127] "ENSG00000274616" "ENSG00000277615" "ENSG00000277600" "ENSG00000277114" "ENSG00000113658" "ENSG00000159216" "ENSG00000029363"
[134] "ENSG00000180530" "ENSG00000101972" "ENSG00000188612" "ENSG00000170653" "ENSG00000100425" "ENSG00000121068" "ENSG00000130382"
[141] "ENSG00000141380" "ENSG00000133884" "ENSG00000100395" "ENSG00000030419" "ENSG00000204371" "ENSG00000238134" "ENSG00000232045"
[148] "ENSG00000224143" "ENSG00000236759" "ENSG00000227333" "ENSG00000206376" "ENSG00000160075" "ENSG00000111276" "ENSG00000196498"
[155] "ENSG00000108175" "ENSG00000171681" "ENSG00000116560" "ENSG00000084676" "ENSG00000129173" "ENSG00000100320" "ENSG00000277564"
[162] "ENSG00000185551" "ENSG00000011304" "ENSG00000169375" "ENSG00000147133" "ENSG00000136574" "ENSG00000285109" "ENSG00000143379"
[169] "ENSG00000142599" "ENSG00000067369" "ENSG00000167491" "ENSG00000147050" "ENSG00000157259" "ENSG00000276644" "ENSG00000105323"
[176] "ENSG00000028310" "ENSG00000103510" "ENSG00000167258" "ENSG00000120690" "ENSG00000198911" "ENSG00000149136" "ENSG00000055609"
[183] "ENSG00000140262" "ENSG00000179348" "ENSG00000126746" "ENSG00000143889" "ENSG00000124813" "ENSG00000134852" "ENSG00000111642"
[190] "ENSG00000137947" "ENSG00000100888" "ENSG00000196591" "ENSG00000089902" "ENSG00000171456" "ENSG00000122482" "ENSG00000164190"
[197] "ENSG00000004487" "ENSG00000173575" "ENSG00000143614" "ENSG00000261992" "ENSG00000033800" "ENSG00000270647" "ENSG00000276833"
[204] "ENSG00000140396" "ENSG00000107290" "ENSG00000196235" "ENSG00000149480" "ENSG00000123374" "ENSG00000137693" "ENSG00000160201"
[211] "ENSG00000068323" "ENSG00000073584" "ENSG00000187079" "ENSG00000123268" "ENSG00000170374" "ENSG00000184634" "ENSG00000104320"
[218] "ENSG00000064102" "ENSG00000129691" "ENSG00000138785" "ENSG00000168813" "ENSG00000272886" "ENSG00000163435" "ENSG00000163848"
[225] "ENSG00000166478" "ENSG00000116017" "ENSG00000169564" "ENSG00000168283" "ENSG00000163348" "ENSG00000137871" "ENSG00000285253"
[232] "ENSG00000178691" "ENSG00000119866" "ENSG00000122877" "ENSG00000172534" "ENSG00000114315" "ENSG00000175550" "ENSG00000100281"
[239] "ENSG00000197063" "ENSG00000171720" "ENSG00000063169" "ENSG00000204256" "ENSG00000234704" "ENSG00000235307" "ENSG00000230678"
[246] "ENSG00000234507" "ENSG00000236227" "ENSG00000215077" "ENSG00000165156" "ENSG00000118418" "ENSG00000086589" "ENSG00000103479"
[253] "ENSG00000133422" "ENSG00000105866" "ENSG00000102034" "ENSG00000111145" "ENSG00000111880" "ENSG00000104064" "ENSG00000095794"
[260] "ENSG00000128604" "ENSG00000132964" "ENSG00000198900" "ENSG00000141027" "ENSG00000177565" "ENSG00000168286" "ENSG00000158636"
[267] "ENSG00000177030" "ENSG00000282712" "ENSG00000101040" "ENSG00000025434" "ENSG00000171988" "ENSG00000137265" "ENSG00000186834"
[274] "ENSG00000253729" "ENSG00000160789" "ENSG00000164105" "ENSG00000204531" "ENSG00000206454" "ENSG00000230336" "ENSG00000237582"
[281] "ENSG00000229094" "ENSG00000235068" "ENSG00000233911" "ENSG00000132510" "ENSG00000136492" "ENSG00000156531" "ENSG00000187555"
[288] "ENSG00000102098" "ENSG00000117713" "ENSG00000164458" "ENSG00000198728" "ENSG00000125651" "ENSG00000121060" "ENSG00000171223"
[295] "ENSG00000119203" "ENSG00000101266" "ENSG00000134532" "ENSG00000196363" "ENSG00000112658" "ENSG00000007866" "ENSG00000169925"
[302] "ENSG00000122565" "ENSG00000204356" "ENSG00000206268" "ENSG00000233801" "ENSG00000231044" "ENSG00000229363" "ENSG00000206357"
[309] "ENSG00000198517" "ENSG00000169057" "ENSG00000171316" "ENSG00000118922" "ENSG00000197905" "ENSG00000168769" "ENSG00000169045"
[316] "ENSG00000284254" "ENSG00000099381" "ENSG00000101191" "ENSG00000119547" "ENSG00000130726" "ENSG00000104856" "ENSG00000148400"
[323] "ENSG00000114861" "ENSG00000183495" "ENSG00000127616" "ENSG00000181315" "ENSG00000120733" "ENSG00000171862" "ENSG00000284792"
[330] "ENSG00000122779" "ENSG00000105856" "ENSG00000283847" "ENSG00000165732" "ENSG00000096063" "ENSG00000121691" "ENSG00000147155"
[337] "ENSG00000167182" "ENSG00000139842" "ENSG00000239306" "ENSG00000088930" "ENSG00000005889" "ENSG00000102710" "ENSG00000145216"
[344] "ENSG00000141568" "ENSG00000166886" "ENSG00000120948" "ENSG00000112081" "ENSG00000083307" "ENSG00000197724" "ENSG00000129351"
[351] "ENSG00000079246" "ENSG00000131051" "ENSG00000189369" "ENSG00000204227" "ENSG00000228520" "ENSG00000235107" "ENSG00000226788"
[358] "ENSG00000206287" "ENSG00000231115" "ENSG00000070444" "ENSG00000166503" "ENSG00000063244" "ENSG00000123908" "ENSG00000153147"
[365] "ENSG00000124613" "ENSG00000115942" "ENSG00000178764" "ENSG00000136603" "ENSG00000138378" "ENSG00000173275" "ENSG00000165891"
[372] "ENSG00000174720" "ENSG00000170581" "ENSG00000165494" "ENSG00000104824" "ENSG00000282947" "ENSG00000118007" "ENSG00000157933"
[379] "ENSG00000100084" "ENSG00000136451" "ENSG00000120616" "ENSG00000125740" "ENSG00000166716" "ENSG00000136367" "ENSG00000010244"
[386] "ENSG00000101216" "ENSG00000140382" "ENSG00000164916" "ENSG00000064313" "ENSG00000162702" "ENSG00000143842" "ENSG00000204231"
[393] "ENSG00000231321" "ENSG00000235712" "ENSG00000227322" "ENSG00000228333" "ENSG00000206289" "ENSG00000049618" "ENSG00000159692"
[400] "ENSG00000168036" "ENSG00000005339" "ENSG00000186350" "ENSG00000159140" "ENSG00000124795" "ENSG00000058673" "ENSG00000089280"
[407] "ENSG00000109381" "ENSG00000110713" "ENSG00000166508" "ENSG00000108654" "ENSG00000135100" "ENSG00000119707" "ENSG00000137309"
[414] "ENSG00000186660" "ENSG00000163930" "ENSG00000101115" "ENSG00000073614" "ENSG00000181722" "ENSG00000057657" "ENSG00000124151"
[421] "ENSG00000170325" "ENSG00000101849" "ENSG00000143799" "ENSG00000105497" "ENSG00000204209" "ENSG00000231617" "ENSG00000229396"
[428] "ENSG00000227046" "ENSG00000206206" "ENSG00000206279" "ENSG00000181449" "ENSG00000140332" "ENSG00000180806" "ENSG00000130940"
[435] "ENSG00000060709" "ENSG00000090447" "ENSG00000139613" "ENSG00000175029" "ENSG00000185049" "ENSG00000104447" "ENSG00000128908"
[442] "ENSG00000143321" "ENSG00000171634" "ENSG00000125618" "ENSG00000106571" "ENSG00000162419" "ENSG00000135111" "ENSG00000189079"
[449] "ENSG00000187325" "ENSG00000197157" "ENSG00000182568" "ENSG00000077097" "ENSG00000116478" "ENSG00000174652" "ENSG00000170322"
[456] "ENSG00000147130" "ENSG00000174197" "ENSG00000189403" "ENSG00000160094" "ENSG00000136875" "ENSG00000144554" "ENSG00000149308"
[463] "ENSG00000175727" "ENSG00000281178" "ENSG00000060138" "ENSG00000173276" "ENSG00000100410" "ENSG00000118900" "ENSG00000112182"
[470] "ENSG00000165119" "ENSG00000141644" "ENSG00000102908" "ENSG00000131848" "ENSG00000173894" "ENSG00000160633" "ENSG00000180787"
[477] "ENSG00000158321" "ENSG00000171940" "ENSG00000134371" "ENSG00000143390" "ENSG00000166199" "ENSG00000187605" "ENSG00000162599"
[484] "ENSG00000080603" "ENSG00000113368" "ENSG00000113810" "ENSG00000143373" "ENSG00000204304" "ENSG00000236353" "ENSG00000225987"
[491] "ENSG00000232005" "ENSG00000224952" "ENSG00000206315" "ENSG00000237344" "ENSG00000106004" "ENSG00000118412" "ENSG00000288475"
[498] "ENSG00000110435" "ENSG00000085224" "ENSG00000157540" "ENSG00000159259" "ENSG00000165819" "ENSG00000118217" "ENSG00000068305"
[505] "ENSG00000140836" "ENSG00000138336" "ENSG00000182944" "ENSG00000186448" "ENSG00000281709" "ENSG00000129245" "ENSG00000054598"
[512] "ENSG00000124789" "ENSG00000166333" "ENSG00000083093" "ENSG00000093072" "ENSG00000071655" "ENSG00000101126" "ENSG00000120798"
[519] "ENSG00000171467" "ENSG00000112118" "ENSG00000243678" "ENSG00000124535" "ENSG00000147862" "ENSG00000124216" "ENSG00000116350"
[526] "ENSG00000142235" "ENSG00000117000" "ENSG00000143970" "ENSG00000277258" "ENSG00000278644" "ENSG00000116030" "ENSG00000198783"
[533] "ENSG00000168661" "ENSG00000197757" "ENSG00000163946" "ENSG00000100625" "ENSG00000140632" "ENSG00000145734" "ENSG00000274803"
[540] "ENSG00000273873" "ENSG00000087903" "ENSG00000179583" "ENSG00000109685" "ENSG00000198646" "ENSG00000178338" "ENSG00000167548"
[547] "ENSG00000069011" "ENSG00000181827" "ENSG00000163346" "ENSG00000079432" "ENSG00000116604" "ENSG00000126778" "ENSG00000115875"
[554] "ENSG00000095951" "ENSG00000114416" "ENSG00000094916" "ENSG00000172977" "ENSG00000104885" "ENSG00000176165" "ENSG00000133392"
[561] "ENSG00000276480" "ENSG00000245680" "ENSG00000104221" "ENSG00000266412" "ENSG00000164104" "ENSG00000169297" "ENSG00000197265"
[568] "ENSG00000092201" "ENSG00000100426" "ENSG00000197111" "ENSG00000147789" "ENSG00000117395" "ENSG00000096401" "ENSG00000121741"
[575] "ENSG00000068024" "ENSG00000182141" "ENSG00000170448" "ENSG00000158941" "ENSG00000198824" "ENSG00000135365" "ENSG00000243943"
[582] "ENSG00000104852" "ENSG00000164151" "ENSG00000177469" "ENSG00000126218" "ENSG00000152217" "ENSG00000130856" "ENSG00000101596"
[589] "ENSG00000162775" "ENSG00000108773" "ENSG00000198554" "ENSG00000092203" "ENSG00000164684" "ENSG00000196700" "ENSG00000175792"
[596] "ENSG00000284901" "ENSG00000161021" "ENSG00000283780" "ENSG00000215021" "ENSG00000132670" "ENSG00000146648" "ENSG00000183207"
[603] "ENSG00000135250" "ENSG00000111786" "ENSG00000187140" "ENSG00000052850" "ENSG00000176619" "ENSG00000179981" "ENSG00000169032"
[610] "ENSG00000100714" "ENSG00000132383" "ENSG00000071539" "ENSG00000073111" "ENSG00000165097" "ENSG00000149050" "ENSG00000011451"
[617] "ENSG00000074657" "ENSG00000188994"
[1] "\"Osteoblasts\"[Mesh]"
[1] "Hsa"
[1] "hsapiens_gene_ensembl"
[1] "9606"
[1] "getBM"
[1] "Check EntrezGeneID again"
[1] "end"
[1] "mmusculus_homolog_ensembl_gene"
[1] "Check EntrezGeneID again"
[1] "end"
[1] "rnorvegicus_homolog_ensembl_gene"
[1] "Check EntrezGeneID again"
|======================================================================================================================================| 100%[1] "end"
[1] "sscrofa_homolog_ensembl_gene"
[1] "Check EntrezGeneID again"
|======================================================================================================================================| 100%[1] "end"
[1] "StartLoop"
Calcuate relativity of the Mitochondrial/Cytoskeletal genes to the Mesh term “Osteoblasts”[Mesh] using a literature mining method.
Mesh_term4Interesting <- '"Osteoblasts"[Mesh]'
GenesLs <- Anno_GeneLs$EnsemblID[!is.na(Anno_GeneLs$EnsemblID)]
Species <- "Hsa" # Hsa Mus Rno Ssc
MitoCySk.Mesh<-WZY_literatureMining(GenesLs=GenesLs, Mesh_term4Interesting=Mesh_term4Interesting, Species=Species, updateProgress = NULL)
[1] "ENSG00000105327" "ENSG00000001084" "ENSG00000002330" "ENSG00000004142" "ENSG00000005059" "ENSG00000006530" "ENSG00000007168"
[8] "ENSG00000011295" "ENSG00000011426" "ENSG00000015475" "ENSG00000019144" "ENSG00000021574" "ENSG00000027001" "ENSG00000030110"
[15] "ENSG00000032742" "ENSG00000041880" "ENSG00000043514" "ENSG00000044090" "ENSG00000044524" "ENSG00000047410" "ENSG00000055950"
[22] "ENSG00000060762" "ENSG00000061794" "ENSG00000062582" "ENSG00000065000" "ENSG00000067606" "ENSG00000067704" "ENSG00000067900"
[29] "ENSG00000068028" "ENSG00000069329" "ENSG00000070718" "ENSG00000070831" "ENSG00000072506" "ENSG00000072756" "ENSG00000072803"
[36] "ENSG00000074071" "ENSG00000074695" "ENSG00000075336" "ENSG00000075945" "ENSG00000076826" "ENSG00000077380" "ENSG00000078674"
[43] "ENSG00000080371" "ENSG00000080815" "ENSG00000080824" "ENSG00000083799" "ENSG00000083937" "ENSG00000084731" "ENSG00000084764"
[50] "ENSG00000085491" "ENSG00000085760" "ENSG00000086065" "ENSG00000086758" "ENSG00000087088" "ENSG00000089060" "ENSG00000095066"
[57] "ENSG00000096080" "ENSG00000096092" "ENSG00000096093" "ENSG00000096872" "ENSG00000097033" "ENSG00000099624" "ENSG00000099800"
[64] "ENSG00000099821" "ENSG00000099849" "ENSG00000100075" "ENSG00000100242" "ENSG00000100300" "ENSG00000100360" "ENSG00000100503"
[71] "ENSG00000100644" "ENSG00000100890" "ENSG00000101052" "ENSG00000101181" "ENSG00000101222" "ENSG00000101367" "ENSG00000101574"
[78] "ENSG00000103254" "ENSG00000103723" "ENSG00000104164" "ENSG00000104381" "ENSG00000104765" "ENSG00000104833" "ENSG00000104892"
[85] "ENSG00000105197" "ENSG00000105255" "ENSG00000105723" "ENSG00000105819" "ENSG00000105948" "ENSG00000105976" "ENSG00000106211"
[92] "ENSG00000106299" "ENSG00000107186" "ENSG00000107282" "ENSG00000107643" "ENSG00000107816" "ENSG00000107819" "ENSG00000107863"
[99] "ENSG00000108064" "ENSG00000108262" "ENSG00000108387" "ENSG00000108561" "ENSG00000108961" "ENSG00000109083" "ENSG00000109103"
[106] "ENSG00000109171" "ENSG00000109332" "ENSG00000109670" "ENSG00000109971" "ENSG00000110711" "ENSG00000111199" "ENSG00000111276"
[113] "ENSG00000111837" "ENSG00000111843" "ENSG00000112031" "ENSG00000112110" "ENSG00000112144" "ENSG00000112651" "ENSG00000112701"
[120] "ENSG00000114107" "ENSG00000114120" "ENSG00000114346" "ENSG00000114446" "ENSG00000114993" "ENSG00000115091" "ENSG00000115317"
[127] "ENSG00000115355" "ENSG00000115840" "ENSG00000115966" "ENSG00000115993" "ENSG00000116459" "ENSG00000116584" "ENSG00000116874"
[134] "ENSG00000117155" "ENSG00000117245" "ENSG00000117593" "ENSG00000118046" "ENSG00000118200" "ENSG00000118242" "ENSG00000118246"
[141] "ENSG00000118965" "ENSG00000119333" "ENSG00000119541" "ENSG00000120662" "ENSG00000120832" "ENSG00000121621" "ENSG00000121957"
[148] "ENSG00000122140" "ENSG00000122386" "ENSG00000122545" "ENSG00000122912" "ENSG00000122970" "ENSG00000123416" "ENSG00000123607"
[155] "ENSG00000124172" "ENSG00000124279" "ENSG00000124333" "ENSG00000124356" "ENSG00000124532" "ENSG00000125354" "ENSG00000125648"
[162] "ENSG00000125652" "ENSG00000125995" "ENSG00000126756" "ENSG00000126768" "ENSG00000126814" "ENSG00000126858" "ENSG00000127914"
[169] "ENSG00000127955" "ENSG00000127989" "ENSG00000128311" "ENSG00000128626" "ENSG00000128641" "ENSG00000128654" "ENSG00000128833"
[176] "ENSG00000128881" "ENSG00000129250" "ENSG00000129682" "ENSG00000130294" "ENSG00000130340" "ENSG00000130348" "ENSG00000130479"
[183] "ENSG00000130643" "ENSG00000130724" "ENSG00000130779" "ENSG00000131165" "ENSG00000131269" "ENSG00000131437" "ENSG00000131966"
[190] "ENSG00000132300" "ENSG00000132356" "ENSG00000132589" "ENSG00000132612" "ENSG00000132842" "ENSG00000132849" "ENSG00000134108"
[197] "ENSG00000134278" "ENSG00000134318" "ENSG00000134375" "ENSG00000134709" "ENSG00000134809" "ENSG00000134982" "ENSG00000135049"
[204] "ENSG00000135127" "ENSG00000135297" "ENSG00000135338" "ENSG00000135441" "ENSG00000135776" "ENSG00000135900" "ENSG00000136108"
[211] "ENSG00000136122" "ENSG00000136270" "ENSG00000136463" "ENSG00000136522" "ENSG00000137100" "ENSG00000137210" "ENSG00000137288"
[218] "ENSG00000137804" "ENSG00000137807" "ENSG00000137942" "ENSG00000138035" "ENSG00000138036" "ENSG00000138069" "ENSG00000138071"
[225] "ENSG00000138095" "ENSG00000138175" "ENSG00000138180" "ENSG00000138182" "ENSG00000138399" "ENSG00000138592" "ENSG00000138758"
[232] "ENSG00000138821" "ENSG00000139131" "ENSG00000139220" "ENSG00000139737" "ENSG00000140463" "ENSG00000140854" "ENSG00000141279"
[239] "ENSG00000141367" "ENSG00000141556" "ENSG00000141577" "ENSG00000141682" "ENSG00000142192" "ENSG00000142208" "ENSG00000142347"
[246] "ENSG00000142444" "ENSG00000143158" "ENSG00000143575" "ENSG00000143761" "ENSG00000143801" "ENSG00000143862" "ENSG00000144040"
[253] "ENSG00000144381" "ENSG00000144824" "ENSG00000144868" "ENSG00000145715" "ENSG00000146282" "ENSG00000146425" "ENSG00000147586"
[260] "ENSG00000149499" "ENSG00000149557" "ENSG00000150756" "ENSG00000150764" "ENSG00000151150" "ENSG00000151929" "ENSG00000152503"
[267] "ENSG00000154174" "ENSG00000154342" "ENSG00000154429" "ENSG00000154839" "ENSG00000155366" "ENSG00000155906" "ENSG00000155970"
[274] "ENSG00000156026" "ENSG00000156313" "ENSG00000156735" "ENSG00000156990" "ENSG00000157540" "ENSG00000157796" "ENSG00000158411"
[281] "ENSG00000158623" "ENSG00000158828" "ENSG00000158882" "ENSG00000160551" "ENSG00000160993" "ENSG00000161800" "ENSG00000162409"
[288] "ENSG00000162769" "ENSG00000162851" "ENSG00000162972" "ENSG00000163462" "ENSG00000163539" "ENSG00000163626" "ENSG00000163635"
[295] "ENSG00000163808" "ENSG00000163930" "ENSG00000164114" "ENSG00000164284" "ENSG00000164347" "ENSG00000164466" "ENSG00000164695"
[302] "ENSG00000164877" "ENSG00000164896" "ENSG00000164933" "ENSG00000164961" "ENSG00000165480" "ENSG00000165487" "ENSG00000165629"
[309] "ENSG00000165813" "ENSG00000166503" "ENSG00000166780" "ENSG00000166963" "ENSG00000167306" "ENSG00000167552" "ENSG00000167553"
[316] "ENSG00000167797" "ENSG00000167862" "ENSG00000168071" "ENSG00000168172" "ENSG00000168385" "ENSG00000168827" "ENSG00000169057"
[323] "ENSG00000169100" "ENSG00000170248" "ENSG00000170606" "ENSG00000170759" "ENSG00000170959" "ENSG00000171103" "ENSG00000171169"
[330] "ENSG00000171533" "ENSG00000171552" "ENSG00000171612" "ENSG00000172171" "ENSG00000172575" "ENSG00000172590" "ENSG00000173548"
[337] "ENSG00000173726" "ENSG00000173786" "ENSG00000173805" "ENSG00000174032" "ENSG00000174173" "ENSG00000174871" "ENSG00000175216"
[344] "ENSG00000175564" "ENSG00000175567" "ENSG00000175582" "ENSG00000175768" "ENSG00000176101" "ENSG00000176108" "ENSG00000176171"
[351] "ENSG00000176410" "ENSG00000176720" "ENSG00000177192" "ENSG00000177370" "ENSG00000177542" "ENSG00000177879" "ENSG00000178209"
[358] "ENSG00000178694" "ENSG00000178952" "ENSG00000178996" "ENSG00000180096" "ENSG00000180834" "ENSG00000181004" "ENSG00000181284"
[365] "ENSG00000181991" "ENSG00000182504" "ENSG00000182628" "ENSG00000183032" "ENSG00000183048" "ENSG00000183172" "ENSG00000183978"
[372] "ENSG00000184640" "ENSG00000184702" "ENSG00000185009" "ENSG00000185043" "ENSG00000185129" "ENSG00000185133" "ENSG00000185340"
[379] "ENSG00000185721" "ENSG00000185825" "ENSG00000185909" "ENSG00000186010" "ENSG00000186522" "ENSG00000187240" "ENSG00000188428"
[386] "ENSG00000188763" "ENSG00000188807" "ENSG00000188906" "ENSG00000189114" "ENSG00000196072" "ENSG00000196230" "ENSG00000196586"
[393] "ENSG00000196659" "ENSG00000197119" "ENSG00000197457" "ENSG00000197535" "ENSG00000197557" "ENSG00000197822" "ENSG00000197912"
[400] "ENSG00000198718" "ENSG00000198836" "ENSG00000198899" "ENSG00000198954" "ENSG00000204152" "ENSG00000204388" "ENSG00000204389"
[407] "ENSG00000204843" "ENSG00000205981" "ENSG00000213123" "ENSG00000213221" "ENSG00000213465" "ENSG00000213625" "ENSG00000214026"
[414] "ENSG00000214253" "ENSG00000215251" "ENSG00000217930" "ENSG00000230989" "ENSG00000238227" "ENSG00000241837" "ENSG00000250479"
[421] "ENSG00000254858" "ENSG00000255112" "ENSG00000262814" "ENSG00000267855" "ENSG00000274523" "ENSG00000288709" "ENSG00000288722"
[1] "\"Osteoblasts\"[Mesh]"
[1] "Hsa"
[1] "hsapiens_gene_ensembl"
[1] "9606"
[1] "getBM"
[1] "Check EntrezGeneID again"
[1] "end"
gp <- literatureMiningPlot(
literature.mining=TFs.Mesh$literature.mining, Thresh.R=0.002,
use.Padj = FALSE, Thresh.P=0.05, label=TRUE, label_n = 10
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
svg(filename = file.path(".", "01.PlotOut", "LiteratureMiningRes_01.svg"), width = 12, height = 8, pointsize = 12)
print(gp)
dev.off()
null device
1
graphics.off()
knitr::opts_knit$set(global.device = TRUE)
print(gp)
knitr::opts_knit$set(global.device = FALSE)
gp <- literatureMiningPlot(
literature.mining=MitoCySk.Mesh$literature.mining, Thresh.R=0.001,
use.Padj = FALSE, Thresh.P=0.15, label=TRUE, label_n = 10
)
svg(filename = file.path(".", "01.PlotOut", "LiteratureMiningRes_02.svg"), width = 12, height = 8, pointsize = 12)
print(gp)
dev.off()
null device
1
graphics.off()
knitr::opts_knit$set(global.device = TRUE)
print(gp)
knitr::opts_knit$set(global.device = FALSE)
Retrieve SNP records on the O-GlcNAc sites of TFs. If the a O-GlcNAc site of a TF really have a profound influences on the function of this TF, the misssense SNP on this site should be associated with some clincial pathogenic phenotype.
# Setup BioMart for SNP
Ann_Mart.Loc<-NULL
attempt<-1
attempt_max<-12
while(is.null(Ann_Mart.Loc)&&attempt<13){
if(attempt%in%seq(from=1,to=attempt_max,3)){
url<-"https://useast.ensembl.org"
}else if(attempt%in%seq(from=2,to=attempt_max,3)){
url<-"https://www.ensembl.org"
}else if(attempt%in%seq(from=3,to=attempt_max,3)){
url<-"https://useast.ensembl.org/"
}else if(attempt%in%seq(from=4,to=attempt_max,3)){
url<-"https://asia.ensembl.org/"
}
try({
Ann_Mart.Loc <- useDataset(
"hsapiens_gene_ensembl",
useMart("ENSEMBL_MART_ENSEMBL", host = url)
)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(Ann_Mart.Loc)){Sys.sleep(10)}
}
# Setup BioMart for Gene ID
Ann_Mart.snp<-NULL
attempt<-1
attempt_max<-12
while(is.null(Ann_Mart.snp)&&attempt<13){
if(attempt%in%seq(from=1,to=attempt_max,3)){
url<-"https://useast.ensembl.org"
}else if(attempt%in%seq(from=2,to=attempt_max,3)){
url<-"https://www.ensembl.org"
}else if(attempt%in%seq(from=3,to=attempt_max,3)){
url<-"https://useast.ensembl.org/"
}else if(attempt%in%seq(from=4,to=attempt_max,3)){
url<-"https://asia.ensembl.org/"
}
try({
Ann_Mart.snp <- useDataset(
"hsapiens_snp",
useMart("ENSEMBL_MART_SNP", host = url)
)
}, silent = TRUE)
attempt<-attempt+1
if(is.null(Ann_Mart.snp)){Sys.sleep(10)}
}
# Get SNP results
UniPort.rs.OGlcNAc <- data.frame()
TFs.UniPort <- nodes$title[nodes$group=="TF"]
Par_pb <- txtProgressBar(min = 1, max = length(TFs.UniPort), style = 3)
n <- 1
for(i in TFs.UniPort){
n<-n+0.49
setTxtProgressBar(Par_pb, n)
UniPort.Loc <- getBM(
mart=Ann_Mart.Loc,
attributes=c(
"uniprotswissprot", "ensembl_gene_id", "ensembl_transcript_id", "strand",
"chromosome_name", "transcript_start", "transcript_end", "cds_length"
),
filter=c(
"uniprotswissprot"
),
values=i,
uniqueRows = T
)
n<-n+0.49
setTxtProgressBar(Par_pb, n)
UniPort.Loc <- UniPort.Loc[order(UniPort.Loc$cds_length, decreasing = TRUE),][1,]
UniPort.rs<-NULL
rm(UniPort.rs)
try({
UniPort.rs <- UniPort.Loc %>%
{
paste(
.[]$chromosome_name,
min(c(.[]$transcript_start, .[]$transcript_end)),
max(c(.[]$transcript_start, .[]$transcript_end)),
sep = ":"
)
} %>%
getBM(
mart=Ann_Mart.snp,
attributes=c(
"refsnp_id", "consequence_type_tv", "consequence_allele_string",
"ensembl_peptide_allele", "translation_start",
"translation_end", "ensembl_transcript_stable_id",
"chr_name", "chrom_start", "chrom_end"
),
filter="chromosomal_region",
values=.,
uniqueRows = TRUE,
verbose = FALSE
)
}, silent = TRUE)
if(!exists("UniPort.rs")) next
UniPort.rs <- UniPort.rs[UniPort.rs$ensembl_transcript_stable_id == UniPort.Loc$ensembl_transcript_id,]
UniPort.rs <- UniPort.rs[UniPort.rs$consequence_type_tv == "missense_variant",]
UniPort.rs <- UniPort.rs[UniPort.rs$translation_start == UniPort.rs$translation_end,]
temp <- OGlcNAc_Anno[OGlcNAc_Anno$Accession==i,]$Position_in_Protein %>%
{UniPort.rs[UniPort.rs$translation_start%in%.[],]}
if(nrow(temp)){
temp <- cbind(
UniPort_ID = i, temp
)
UniPort.rs.OGlcNAc <- rbind(
UniPort.rs.OGlcNAc, temp
)
}
n<-n+0.02
setTxtProgressBar(Par_pb, n)
}
|======================================================================================================================================| 100%
cat("\n")
close(Par_pb)
SNP.Phenotype <- getBM(
mart=Ann_Mart.snp,
attributes=c(
"refsnp_id", "clinical_significance", "p_value",
"phenotype_name", "phenotype_description", "variation_names"
),
filter="snp_filter",
values=UniPort.rs.OGlcNAc$refsnp_id%>%unique(),
uniqueRows = TRUE
)
SNP.Phenotype<-SNP.Phenotype[SNP.Phenotype$phenotype_description!="",]
UniPort.rs.OGlcNAc.Clinc <- UniPort.rs.OGlcNAc[UniPort.rs.OGlcNAc$refsnp_id%in%unique(SNP.Phenotype$refsnp_id),]
UniPort.rs.OGlcNAc.Clinc <- merge(UniPort.rs.OGlcNAc.Clinc, SNP.Phenotype, by="refsnp_id")
Filter out the TFs that doesn’t have pathogenic SNP on O-GlcNAc sites.
temp <- nodes[nodes$group=="TF",]
temp <- temp[temp$title%in%unique(UniPort.rs.OGlcNAc.Clinc$UniPort_ID),]
nodes <- rbind(
nodes[nodes$group!="TF",],
temp
)
edges <- edges[edges$from%in%nodes$id & edges$to%in%nodes$id,]
nodes<-nodes[nodes$id%in%unique(c(edges$from,edges$to)),]
Add url link to nodes.
for(i in nodes$title[nodes$group=="TF"]){
OGlcNAc.site <- OGlcNAc_Anno[OGlcNAc_Anno$Query==i,]$Position_in_Protein %>%
unique() %>% paste(., collapse = ";")
temp <- UniPort.rs.OGlcNAc.Clinc[UniPort.rs.OGlcNAc.Clinc$UniPort_ID==i, ]
temp <- temp[!duplicated(temp$refsnp_id), ]
SNP.list <- c()
for (k in temp$refsnp_id) {
SNP.list <- c(
SNP.list,
paste0(
"<br><a href='https://www.ensembl.org/Homo_sapiens/Variation/Phenotype?db=core;r=",
paste0(temp$chr_name[temp$refsnp_id==k],":",temp$chrom_start[temp$refsnp_id==k],"-",temp$chrom_end[temp$refsnp_id==k]),
";v=", k, ";vdb=variation' target='_blank'>SNP: ",
k, "; on AA site: ", temp$translation_start[temp$refsnp_id==k],
" of ", temp$ensembl_transcript_stable_id[temp$refsnp_id==k], "</a>"
)
)
}
nodes$title[nodes$title==i] <- HTML(
paste0(
'<a href="https://www.uniprot.org/uniprotkb/',i,'/entry" target="_blank">', i, '</a><br>',
'<a href="https://oglcnac.org/atlas/detail/', i, '" target="_blank">O-GlcNAc Site: ', OGlcNAc.site, '</a><br>',
'O-GlcNAc site with pathogenic SNP:'
), HTML(SNP.list)
)
}
for(i in nodes$title[nodes$group!="TF"]){
nodes$title[nodes$title==i] <- HTML(
paste0(
'<a href="https://www.uniprot.org/uniprotkb/',i,'/entry" target="_blank">', i, '</a>'
)
)
}
TFs.Mesh.keep <- TFs.Mesh$literature.mining[TFs.Mesh$literature.mining$GeneSymbol%in%nodes$label[nodes$group=="TF"],]
MitoCySk.Mesh.keep <- MitoCySk.Mesh$literature.mining[MitoCySk.Mesh$literature.mining$GeneSymbol%in%nodes$label[nodes$group!="TF"],]
Mesh.keep <- rbind(
TFs.Mesh.keep, MitoCySk.Mesh.keep
)
R.max <- max(Mesh.keep$R)
R.min <- min(Mesh.keep$R)
for(i in nodes$label){
nodes$value[nodes$label==i]<-1+(((Mesh.keep$R[Mesh.keep$GeneSymbol==i]-R.min)/R.max)*100)
}
TF_ls <- TFLink$Name.TF %>% unique()
for(i in nodes$label[nodes$group!="TF"]){
if(i %in% TF_ls){
nodes$shape[nodes$label==i] <- "dot"
}
}
Add url link to edge.
edges <- cbind(
edges, title = 1, width = 1, color="lightblue", highlight = "red"
)
for(i in 1:nrow(edges)){
from <- edges[i,]$from
to <- edges[i,]$to
from <- nodes$label[nodes$id==from]
to <- nodes$label[nodes$id==to]
temp <- TFLink_related_filter[TFLink_related_filter$Name.TF==from & TFLink_related_filter$Name.Target==to,]
PubMed.ls <- temp$PubmedID %>%
str_split(.,";") %>% unlist() %>%
paste0(
"<a href='https://pubmed.ncbi.nlm.nih.gov/", ., "/' target='_blank'>",
., "</a><br>"
)
edges$title[i] <- paste0(
"Has Small-scale Evidence: ", temp$`Small-scale.evidence`, "<br>", HTML(PubMed.ls)
) %>% HTML()
edges$width[i] <- length(PubMed.ls)
if(temp$`Small-scale.evidence`=="Yes"){
edges$color[i] <- "#9999FF"
}
}
e.Max <- max(edges$width)
e.Min <- min(edges$width)
edges$width <- round(log(edges$width - e.Min + 1, base = 2) + 1)
Add legends
# nodes data.frame for legend
lnodes <- data.frame(
label = c(
"TFs related to cytoskeleton\nwithout O-GlcNAc site records\nand is not DEGs in GSE138783",
"TFs related to cytoskeleton\nwith O-GlcNAc site records\nand is DEGs in GSE138783",
"TFs with O-GlcNAc site records\nand is not DEGs in GSE138783",
"TFs with O-GlcNAc site records\nand is DEGs in GSE138783",
"Targets related to cytoskeleton", "Targets related to mitochondria",
"Targets related to\nboth of cytoskeleton and mitochondria",
"Size of node indicates the amount\nof articles related to osteoblast",
"Width of edge indicates\nthe amount of articles"
),
shape = c(
"dot", "star", "square", "star", "triangle", "triangle", "diamond", "text", "text"
),
color = c(
"red", "red", "blue", "blue", "red", "orange", "black", "#FFFFFF", "#FFFFFF"
),
title = "Legends",
id = 1:9
)
# edges data.frame for legend
ledges <- data.frame(
color = c("#9999FF", "lightblue"),
label = c("Has small-scale\nevidence", "No small-scale\nevidence"),
arrows =c("to", "to"), width = 10
)
Make interactive network
network <- visNetwork(nodes, edges, height = "800px", width = "100%") %>%
visNodes(physics = TRUE) %>%
visPhysics(stabilization = TRUE, solver = "repulsion", maxVelocity = 1) %>%
visEdges(
smooth = TRUE, shadow = FALSE,
arrows =list(to = list(enabled = TRUE, scaleFactor = 0.5))
) %>%
visOptions(
nodesIdSelection = TRUE, selectedBy = "group", manipulation = FALSE,
highlightNearest = list(enabled = TRUE, degree = 9999, algorithm = "hierarchical")
) %>%
visLegend(addEdges = ledges, addNodes = lnodes, useGroups = FALSE)
network %>% visSave(file = "network.html")
print(network)
NULL
save.image("./CheckPoint.RData")