diff --git a/BibliographicStudies.Rmd b/BibliographicStudies.Rmd index 30fa7f9..eac1b2a 100644 --- a/BibliographicStudies.Rmd +++ b/BibliographicStudies.Rmd @@ -1,5 +1,5 @@ --- -title: "Bibliographic Studies" +title: "Bibliometric Studies" subtitle: "Reproducible Bibliometric Analysis of Pathology Articles Using PubMed, E-direct, WoS, Google Scholar" author: "Serdar Balcı, MD, Pathologist" date: '`r format(Sys.Date())`' @@ -25,6 +25,14 @@ output: toc_float: yes --- +Follow @serdarbalci +[](https://github.com/sbalci/PubMed/issues) +[](https://saythanks.io/to/sbalci) +[](http://hits.dwyl.io/sbalci/PubMed) + + + # Introduction It is a very common bibliometric study type to retrospectively analyse the number of peer reviewed articles written from a country to view the amount of contribution made in a specific scientific discipline. diff --git a/BibliographicStudies.nb.html b/BibliographicStudies.nb.html index ab0371a..cbb6ef9 100644 --- a/BibliographicStudies.nb.html +++ b/BibliographicStudies.nb.html @@ -11,9 +11,9 @@ - + -
It is a very common bibliometric study type to retrospectively analyse the number of peer reviewed articles written from a country to view the amount of contribution made in a specific scientific discipline.
@@ -2916,7 +2927,7 @@If you want to see the code used in the analysis please click the code button on the right upper corner or throughout the page.
I would like to hear your feedback: https://goo.gl/forms/YjGZ5DHgtPlR1RnB3
-This document will be continiously updated and the last update was on 2019-04-13.
+This document will be continiously updated and the last update was on 2019-06-02.
Serdar Balcı, MD, Pathologist would like to hear your feedback: https://goo.gl/forms/YjGZ5DHgtPlR1RnB3
-This document will be continiously updated and the last update was on 2019-04-13.
+This document will be continiously updated and the last update was on 2019-06-02.
https://www.semanticscholar.org/
+Semantic Scholar Open Research Corpus
https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/open-corpus/index.html
@@ -3009,6 +3013,20 @@ 2.3 Semantic Scholar
caffeinate
https://labs.cognitive.microsoft.com/en-us/project-academic-knowledge
@@ -3080,6 +3098,21 @@my_data_frame <- readr::read_delim("~/downloads/pubmed_result.txt", delim = "\t", col_names = FALSE)
+
+chunk <- 5000
+mylist <- split(my_data_frame, rep(1:ceiling(nrow(my_data_frame)/chunk), each=chunk, length.out=nrow(my_data_frame)))
+
+X1 <- mylist$`1`
+X2 <- mylist$`2`
+X3 <- mylist$`3`
+X4 <- mylist$`4`
+
+readr::write_csv(X1, "~/downloads/1.txt")
+readr::write_csv(X2, "~/downloads/2.txt")
+readr::write_csv(X3, "~/downloads/3.txt")
+readr::write_csv(X4, "~/downloads/4.txt")
+
https://elixir-europe.org/platforms/data/core-data-resources
Ülkemizdeki araştırma altyapısına katkı sağlamak amacıyla, Araştırma Destek Programları Başkanlığı (ARDEB) bünyesinde, 1965 yılından günümüze kadar sonuçlanmış olan 17.808 adet projenin sonuç raporunun tam metinleri, TÜBİTAK Ulusal Akademik Ağ ve Bilgi Merkezi (ULAKBİM) “TÜBİTAK Destekli Projeler Veri Tabanı”nda yayımlanmaktadır.
+Söz konusu veri tabanına https://trdizin.gov.tr/search/projectSearch.xhtml linkinden erişim sağlanabilmekte ve sonuç raporlarına ilişkin proje no, başlık, yürütücü/araştırmacı/danışman adı, yıl ve anahtar kelime bazında tarama yapılabilmektedir.
+https://github.com/schochastics/graphlayouts
+https://github.com/ropensci/rentrez/issues/134#event-2313355730
+
+library(rentrez)
+library(XML)
+
+MeSH_from_pmid <- function(pmid){
+ rec <- entrez_fetch(db="pubmed", id=pmid, rettype = "xml", parsed=TRUE)
+ m_names <- xpathSApply(rec, "//MeshHeadingList/MeshHeading/DescriptorName", xmlValue)
+ m_ui <- xpathSApply(eg_rec, "//MeshHeadingList/MeshHeading/DescriptorName", xmlAttrs)[1,]
+ data.frame(mesh_ui = m_ui, descriptor = m_names)
+}
+
+MeSH_from_pmid(27591765)
+
+
+
+Serdar Balcı, MD, Pathologist would like to hear your feedback: https://goo.gl/forms/YjGZ5DHgtPlR1RnB3
-This document will be continiously updated and the last update was on 2019-04-13.
+This document will be continiously updated and the last update was on 2019-05-27.
endometriosis_articlesPerTotalArticles <- europepmc::epmc_hits_trend(query = "endometriosis", period = 1980:2018)
+
+endometriosis_articlesPerTotalArticles
+
+# View(endometriosis_articlesPerTotalArticles)
+
+xlsx::write.xlsx(endometriosis_articlesPerTotalArticles, here::here("data/endometriosis_articlesPerTotalArticles.xlsx")
+)
+
+
+
+
+
+
+
+library(ggplot2)
+ggplot(endometriosis_articlesPerTotalArticles, aes(year, query_hits / all_hits)) +
+ geom_point() +
+ geom_line() +
+ xlab("Year published") +
+ ylab("Proportion of Endometriois \n articles in Europe PMC")
+
+
+
+("endometriosis" AND "inflammation") AND (SRC:"MED")
+
+
+
+dvcs <- c('("endometriosis" AND "inflammation") AND (SRC:"MED")', '("endometriosis" AND "infertility") AND (SRC:"MED")', '("endometriosis" AND "fertility") AND (SRC:"MED")' , '("endometriosis") AND (SRC:"MED")'
+ )
+
+
+
+
+
+
+my_df <- purrr::map_df(dvcs, function(x) {
+ # get number of publications with indexed reference lists
+ refs_hits <-
+ europepmc::epmc_hits_trend(x, period = 1980:2018, synonym = FALSE)$query_hits
+ # get hit count querying for code repositories
+ europepmc::epmc_hits_trend(x, period = 1980:2018, synonym = FALSE) %>%
+ dplyr::mutate(query_id = x) %>%
+ dplyr::mutate(refs_hits = refs_hits) %>%
+ dplyr::select(year, all_hits, refs_hits, query_hits, query_id)
+})
+my_df
+
+
+
+
+
+
+## Recoding my_df$query_id into my_df$Query
+my_df$Query <- recode(my_df$query_id,
+ "(\"endometriosis\" AND \"inflammation\") AND (SRC:\"MED\")" = "endometriosis AND inflammation",
+ "(\"endometriosis\" AND \"infertility\") AND (SRC:\"MED\")" = "endometriosis AND infertility",
+ "(\"endometriosis\" AND \"fertility\") AND (SRC:\"MED\")" = "endometriosis AND fertility",
+ "(\"endometriosis\") AND (SRC:\"MED\")" = "endometriosis")
+my_df$Query <- factor(my_df$Query)
+
+
+
+
+
+
+
+library(ggplot2)
+ggplot(my_df, aes(x = year,
+ y = query_hits / all_hits,
+ group = Query,
+ color = Query)) +
+ geom_point() +
+ geom_line() +
+ xlab("Year published") +
+ ylab("Proportion of articles in PubMed \n Data from: Europe PMC") +
+ theme(legend.position = "bottom",
+ legend.direction = "vertical")
+
+
+
+
+
+
+library(ggplot2)
+ggplot(my_df, aes(x = year,
+ y = scales::percent(query_hits / all_hits, accuracy = 0.02),
+ group = Query,
+ color = Query)) +
+ geom_point() +
+ geom_line() +
+ xlab("Year published") +
+ ylab("Proportion of articles in PubMed \n Data from: Europe PMC") +
+ theme(legend.position = "bottom",
+ legend.direction = "vertical")
+
+
+
+
+
+
+library(ggplot2)
+ggplot(my_df, aes(factor(year), query_hits / refs_hits, group = query_id,
+ color = query_id)) +
+ geom_line(size = 1, alpha = 0.8) +
+ geom_point(size = 2) +
+ scale_color_brewer(name = "Query", palette = "Set1")+
+ xlab("Year published") +
+ ylab("Proportion of articles in PubMed \n Data from: Europe PMC")
+
+
+
+library("handlr")
+deneme <- handlr::bibtex_reader("data/europepmc_endometriosisinflammation.bib")
+
+# handlr::citeproc_writer(deneme)
+
+# handlr::codemeta_writer(deneme)
+
+
+jsonlite::write_json(handlr::codemeta_writer(deneme, pretty = FALSE), path = "data/europepmc_endometriosisinflammation.json")
+
+
+
+
+
+
+
+z <- system.file("data/europepmc_endometriosisinflammation.bib", package = "handlr")
+x <- HandlrClient$new(x = z)
+x$read("bibtex")
+x$write("citeproc")
+
+
+
+endometriosis_articles1 <- europepmc::epmc_hits_trend(query = "endometriosis AND fertility", period = 1980:2018)
+
+endometriosis_articles1
+
+# View(endometriosis_articlesPerTotalArticles)
+
+xlsx::write.xlsx(endometriosis_articlesPerTotalArticles, here::here("data/endometriosis_articlesPerTotalArticles.xlsx")
+)
+
+
@@ -1790,26 +1938,261 @@ https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5031306/
knitcitations::citep("10.1186/s12905-016-0336-0")
-
-[1] "(Brüggmann, Elizabeth-Martinez, Klingelhöfer, Quarcoo, Jaque, and Groneberg, 2016)"
-
vosviewer
+"endometriosis"[MeSH Major Topic] AND "english and humans"[Filter] AND ("2009/03/10"[PDat] : "2019/03/07"[PDat])
+
+
+
+
+
+myTerm <- rstudioapi::terminalCreate(show = FALSE)
+rstudioapi::terminalSend(
+myTerm,
+"xtract -input data/Last10YearEndometriosis.xml -pattern PubmedArticle -tab \"|\" -sep \";\" -def \"NA\" -element MedlineCitation/PMID -block ArticleId -if ArticleId@IdType -equals doi -element ArticleId > data/Last10YearEndometriosis.csv \n"
+)
+Sys.sleep(1)
+repeat {
+Sys.sleep(0.1)
+if (rstudioapi::terminalBusy(myTerm) == FALSE) {
+print("Code Executed")
+break
+}
+}
+
+
+[1] "Code Executed"
+
+
+
+
+
+
+library(readr)
+Last10YearEndometriosis <- read_delim(here::here("data/Last10YearEndometriosis.csv"),
+ "|",
+ escape_double = FALSE,
+ col_names = FALSE,
+ trim_ws = TRUE)
+
+
+Parsed with column specification:
+cols(
+ X1 = [32mcol_double()[39m,
+ X2 = [31mcol_character()[39m
+)
+538 parsing failures.
+row col expected actual file
+ 19 -- 2 columns 1 columns '/Users/serdarbalciold/RepTemplates/pubmed/data/Last10YearEndometriosis.csv'
+165 -- 2 columns 1 columns '/Users/serdarbalciold/RepTemplates/pubmed/data/Last10YearEndometriosis.csv'
+218 -- 2 columns 1 columns '/Users/serdarbalciold/RepTemplates/pubmed/data/Last10YearEndometriosis.csv'
+262 -- 2 columns 1 columns '/Users/serdarbalciold/RepTemplates/pubmed/data/Last10YearEndometriosis.csv'
+320 -- 2 columns 1 columns '/Users/serdarbalciold/RepTemplates/pubmed/data/Last10YearEndometriosis.csv'
+... ... ......... ......... ............................................................................
+See problems(...) for more details.
+
+
+# View(Last10YearEndometriosis)
+
+names(Last10YearEndometriosis) <- c("PMID", "DOI")
+
+
+
+
+
+
+
+
+PMID_List <- paste0("PMID=(", Last10YearEndometriosis$PMID[!is.na(Last10YearEndometriosis$PMID)], ") OR")
+# DOI_List <- paste0("DO=(", Last10YearEndometriosis$DOI[!is.na(Last10YearEndometriosis$DOI)], ") OR")
+
+
+write(PMID_List,
+ here::here("data/endometriosis/Last10YearEndometriosis_pmid_ListforWOS.txt")
+)
+
+# write(DOI_List,
+# here::here("data/NeurosurgeryFromTurkey_doi_ListforWOS.txt")
+# )
+
+
+
+
+
+
+
+library(tidyverse)
+
+
+[37m── [1mAttaching packages[22m ──────────────────────────────────────── tidyverse 1.2.1 ──[39m
+[37m[32m✔[37m [34mggplot2[37m 3.1.0 [32m✔[37m [34mpurrr [37m 0.3.1
+[32m✔[37m [34mtibble [37m 2.0.1 [32m✔[37m [34mdplyr [37m 0.8.0.[31m1[37m
+[32m✔[37m [34mtidyr [37m 0.8.3 [32m✔[37m [34mstringr[37m 1.4.0
+[32m✔[37m [34mreadr [37m 1.3.1 [32m✔[37m [34mforcats[37m 0.4.0 [39m
+[37m── [1mConflicts[22m ─────────────────────────────────────────── tidyverse_conflicts() ──
+[31m✖[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
+[31m✖[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()[39m
+
+
+library(bibliometrix)
+
+
+To cite bibliometrix in publications, please use:
+
+Aria, M. & Cuccurullo, C. (2017) bibliometrix: An R-tool for comprehensive science mapping analysis, Journal of Informetrics, 11(4), pp 959-975, Elsevier.
+
+
+http:\\www.bibliometrix.org
+
+
+To start with the shiny web-interface, please digit:
+biblioshiny()
+
+
+bibliometrix::biblioshiny()
+
+
+Loading required package: shiny
+
+Listening on http://127.0.0.1:7866
+Loading required package: rio
+Loading required package: DT
+
+Attaching package: ‘DT’
+
+The following objects are masked from ‘package:shiny’:
+
+ dataTableOutput, renderDataTable
+
+Loading required package: shinycssloaders
+Loading required package: shinythemes
+Loading required package: wordcloud2
+Loading required package: colourpicker
+
+Attaching package: ‘colourpicker’
+
+The following object is masked from ‘package:shiny’:
+
+ runExample
+
+Loading required package: treemap
+Loading required package: ggmap
+Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
+Please cite ggmap if you use it! See citation("ggmap") for details.
+Loading required package: visNetwork
+Loading required package: plotly
+
+Attaching package: ‘plotly’
+
+The following object is masked from ‘package:ggmap’:
+
+ wind
+
+The following object is masked from ‘package:rio’:
+
+ export
+
+The following object is masked from ‘package:ggplot2’:
+
+ last_plot
+
+The following object is masked from ‘package:stats’:
+
+ filter
+
+The following object is masked from ‘package:graphics’:
+
+ layout
+
+
+Attaching package: ‘Matrix’
+
+The following object is masked from ‘package:tidyr’:
+
+ expand
+
+
+
+Converting your isi collection into a bibliographic dataframe
+
+Articles extracted 100
+Articles extracted 200
+Articles extracted 300
+Articles extracted 400
+Articles extracted 500
+Articles extracted 600
+Articles extracted 700
+Articles extracted 800
+Articles extracted 900
+Articles extracted 1000
+Articles extracted 1100
+Articles extracted 1200
+Articles extracted 1300
+Articles extracted 1400
+Articles extracted 1500
+Articles extracted 1600
+Articles extracted 1700
+Articles extracted 1800
+Articles extracted 1900
+Articles extracted 2000
+Articles extracted 2100
+Articles extracted 2200
+Articles extracted 2300
+Articles extracted 2400
+Articles extracted 2500
+Articles extracted 2600
+Articles extracted 2700
+Articles extracted 2800
+Articles extracted 2900
+Articles extracted 3000
+Articles extracted 3100
+Articles extracted 3200
+Articles extracted 3300
+Articles extracted 3400
+Articles extracted 3500
+Articles extracted 3600
+Articles extracted 3700
+Articles extracted 3800
+Articles extracted 3900
+Articles extracted 4000
+Articles extracted 4100
+Articles extracted 4200
+Articles extracted 4300
+Articles extracted 4400
+Articles extracted 4500
+Articles extracted 4600
+Articles extracted 4700
+Articles extracted 4800
+Articles extracted 4900
+Articles extracted 5000
+Articles extracted 5061
+Done!
+
+
+Generating affiliation field tag AU_UN from C1: Done!
+
+
+
+library("tidyverse")
+library("rjson")
+library("magicfor")
+
+
+
+
+
+
+myDOI <- readr::read_csv(here::here("data/BalciSdoi.txt"), col_names = "DOI", col_types = "c")
+
+
+
+
+
+
+myDOI <- myDOI %>%
+ mutate(
+ apitallies = paste0("https://api.scite.ai/tallies/", DOI)
+ ) %>%
+ rownames_to_column()
+
+
+
+
+
+
+magicfor::magic_for(silent = TRUE)
+json_data <- for (i in 1:(dim(myDOI)[1]-1)) {
+ json_name <- paste0("Article", myDOI$rowname[i])
+ json_data <- rjson::fromJSON(file = myDOI$apitallies[i])
+ put(json_name, json_data)
+}
+jsonDF <- magicfor::magic_result_as_dataframe()
+magicfor::magic_free()
+
+jsonDF <- dplyr::bind_rows(jsonDF$json_data, .id = "meta_information")
+
+
+
+
+
+
+
+