As an example of a use case of the Articles API citations module, letโs look at the distribution of citations over time.
library(tidyverse) # for general data wrangling
library(RJDBC) # for CDB access
library(plotly) # for interactive plots
library(scales) # for x-axis configuration
library(rmarkdown) # for paged_table
Query the CDB for all PubMed articles between 1990 and 2018 and get the corresponding citation counts.
input <- dbGetQuery(cdb,
'
SELECT c.external_id AS "pmid",
CARDINALITY(w.cited_by_array) AS "citedCount",
EXTRACT(YEAR FROM c.first_publish_date) AS "pubYear"
FROM citations c
JOIN cn_complete_wordarray w
ON c.id = w.citation_id
WHERE c.source = \'MED\'
AND EXTRACT(YEAR FROM c.first_publish_date) BETWEEN \'1990\' AND \'2018\'
'
)
dbDisconnect(cdb)
## [1] TRUE
Bin the citation counts into suitable bins and calculate proportions:
bins <- input %>%
group_by(pubYear) %>%
summarise(
`0` = sum(citedCount == 0),
`1-10` = sum(between(citedCount, 1, 10)),
`11-20` = sum(between(citedCount, 11, 20)),
`21-30` = sum(between(citedCount, 21, 30)),
`31-40` = sum(between(citedCount, 31, 40)),
`41-50` = sum(between(citedCount, 41, 50)),
`>50` = sum(citedCount > 50)
) %>%
pivot_longer(cols = -pubYear,
names_to = "bin",
values_to = "count") %>%
group_by(pubYear) %>%
mutate(prop = count / sum(count))
paged_table(bins)
Plot as a stacked area plot: