-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstudycomparisonanalysis.Rmd
155 lines (130 loc) · 7.88 KB
/
studycomparisonanalysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
---
title: "Study Comparison"
author: "Julian Gautier"
date: "November 13, 2019"
output:
html_document: default
pdf_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(readr)
library(sqldf)
library(here)
```
# Reading in and preparing the data
```{r data}
alljournals <- read_delim(file=here("studycomparison.tsv"), "\t",
escape_double = FALSE, trim_ws = TRUE)
```
# Overlapping journals: Identify the 127 journals in Crosas et al 2018 that are also in one or more of five previous studies (the sixth study, Höffler 2017, is exluded because we could not obtain journal-level data)
```{r query for overlapping journals}
overlappingjournals <- sqldf("
select journaltitle, crosasdiscipline, gleditschhaspolicy, gherghinahaspolicy, zenkmoltgenhaspolicy, sturgeshaspolicy, vlaeminckhaspolicy, crosashaspolicy
from alljournals
where crosashaspolicy !='NA'
and (gleditschhaspolicy != 'NA' or gherghinahaspolicy != 'NA' or zenkmoltgenhaspolicy != 'NA' or sturgeshaspolicy != 'NA' or vlaeminckhaspolicy != 'NA')
")
```
# Ignored journals: There are 24 cases in which one or more other studies report that a journal had a data policy when our study found that those journals do not have policies. We are removing these cases from the rest of the analysis
```{r query for reports of journals having policies where we report none}
ignoredjournals <- sqldf("
select journaltitle, crosasdiscipline, gleditschhaspolicy, gherghinahaspolicy, zenkmoltgenhaspolicy, sturgeshaspolicy, vlaeminckhaspolicy, crosashaspolicy
from alljournals
where crosashaspolicy ='0' and (gleditschhaspolicy = '1' or gherghinahaspolicy = '1' or zenkmoltgenhaspolicy = '1' or sturgeshaspolicy = '1' or vlaeminckhaspolicy = '1')
")
```
# Remaining journals: Now we're interested only in the remaining 103 journals
```{r query for journals overlapping journals, excluding ignored journals}
remainingjournals <- sqldf("
select *
from overlappingjournals
where journaltitle not in(
select journaltitle
from ignoredjournals
)
")
```
# Data policies found: Of the 103 remaining journals, we and one or more of the five other studies found that 56 journals did have data policies
```{r query for journals we and at least another study found a policy}
datapoliciesfound <- sqldf("
select journaltitle, crosasdiscipline, gleditschhaspolicy, gherghinahaspolicy, zenkmoltgenhaspolicy, sturgeshaspolicy, vlaeminckhaspolicy, crosashaspolicy
from remainingjournals
where crosashaspolicy ='1'
and (gleditschhaspolicy = '1' or gherghinahaspolicy = '1' or zenkmoltgenhaspolicy = '1' or sturgeshaspolicy = '1' or vlaeminckhaspolicy = '1')
")
```
# Data policies not found: Of those 103 journals, there are 20 journals for which none of the studies, including ours, found data policies
```{r query for journals where no study found a policy}
datapoliciesnotfound <- sqldf("
select journaltitle, crosasdiscipline, gleditschhaspolicy, gherghinahaspolicy, zenkmoltgenhaspolicy, sturgeshaspolicy, vlaeminckhaspolicy, crosashaspolicy
from remainingjournals
where crosashaspolicy ='0'
and (gleditschhaspolicy != ('NA' or '1') or gherghinahaspolicy != ('NA' or '1') or zenkmoltgenhaspolicy != ('NA' or '1') or sturgeshaspolicy != ('NA' or '1') or vlaeminckhaspolicy != ('NA' or '1'))
")
```
# New data policies found: Of those 103 journals, we found that 27 journals did have data policies while none of the other five studies found policies
```{r query for journals where we found a policy and no other study did}
newdatapoliciesfound <- sqldf("
select journaltitle, crosasdiscipline, gleditschhaspolicy, gherghinahaspolicy, zenkmoltgenhaspolicy, sturgeshaspolicy, vlaeminckhaspolicy, crosashaspolicy
from remainingjournals
where journaltitle not in (select journaltitle from datapoliciesfound)
and journaltitle not in (select journaltitle from datapoliciesnotfound)
")
```
# Recode categorical data
```{r recode data for codebook}
alljournals$gleditschhaspolicy <- as.factor(car::recode(alljournals$gleditschhaspolicy, "1 = 'Yes'; 0 = 'No'"))
alljournals$gherghinahaspolicy <- as.factor(car::recode(alljournals$gherghinahaspolicy, "1 = 'Yes'; 0 = 'No'"))
alljournals$zenkmoltgenhaspolicy <- as.factor(car::recode(alljournals$zenkmoltgenhaspolicy, "1 = 'Yes'; 0 = 'No'"))
alljournals$sturgeshaspolicy <- as.factor(car::recode(alljournals$sturgeshaspolicy, "1 = 'Yes'; 0 = 'No'"))
alljournals$vlaeminckhaspolicy <- as.factor(car::recode(alljournals$vlaeminckhaspolicy, "1 = 'Yes'; 0 = 'No'"))
alljournals$crosashaspolicy <- as.factor(car::recode(alljournals$crosashaspolicy, "1 = 'Yes'; 0 = 'No'"))
alljournals$sturgesstrictness <- car::recode(alljournals$sturgesstrictness, "2 = 'Required'; 1 = 'Encourage';0 = 'No Policy'")
alljournals$sturgesstrictness <- factor(alljournals$sturgesstrictness, levels = c("No Policy", "Encourage", "Required"))
alljournals$crosasstrictness <- car::recode(alljournals$crosasstrictness, "2 = 'Required'; 1 = 'Encourage';0 = 'No Policy'")
alljournals$crosasstrictness <- factor(alljournals$crosasstrictness, levels = c("No Policy", "Encourage", "Required"))
## Converting strings to character variables
alljournals$journaltitle <- as.character(alljournals$journaltitle)
alljournals$crosasissn <- as.character(alljournals$crosasissn)
```
# Generate Codebook
```{r generate codebook}
library(memisc)
library(lattice)
library(MASS)
codebook_data <- as.data.set(alljournals)
codebook_data <- within(codebook_data, {
description(journaltitle) <- "Journal title"
description(gleditschhaspolicy) <- "Gleditsch has policy"
description(gherghinahaspolicy) <- "Gherghina has policy"
description(zenkmoltgenhaspolicy) <- "Zenkmoltgen has policy"
description(sturgeshaspolicy) <- "Sturges has policy"
description(sturgesstrictness) <- "Sturges strictness"
description(vlaeminckhaspolicy) <- "Vlaeminck has policy"
description(vlaemincksubject) <- "Vlaeminck subject"
description(crosashaspolicy) <- "Crosas has policy"
description(crosasdiscipline) <- "Crosas discipline"
description(crosasstrictness) <- "Crosas strictness"
description(crosasimpactfactor) <- "Crosas impact factor"
description(crosaspublisher) <- "Crosas publisher"
description(crosasissn) <- "Crosas issn"
wording(journaltitle) <- "Title of the journal"
wording(gleditschhaspolicy) <- "Does Gleditsch, N., & Metelits, C. 2003 report that the journal has a data policy?"
wording(gherghinahaspolicy) <- "Does Gherghina, S., & Katsanidou, A. 2013 report that the journal has a data policy?"
wording(zenkmoltgenhaspolicy) <- "Does Zenk-Möltgen, W., & Lepthien, G. 2014 report that the journal has a data policy?"
wording(sturgeshaspolicy) <- "Does Sturges et al. 2015 report that the journal has a data policy?"
wording(sturgesstrictness) <- "Does Sturges et al. 2015 report that the journal requires data sharing or just mention/encourage it?"
wording(vlaeminckhaspolicy) <- "Does Vlaeminck & Herrmann 2015 report that the journal has a data policy?"
wording(vlaemincksubject) <- "The discipline that Vlaeminck & Herrmann 2015 assigns to the journal"
wording(crosashaspolicy) <- "Does Crosas et al 2018 report that the journal has a data policy?"
wording(crosasdiscipline) <- "Discipline the journal is listed under in the Journal Citation Report"
wording(crosasstrictness) <- "Does Crosas et al 2018 report that the journal requires data sharing or just mentions/encourages it?"
wording(crosasimpactfactor) <- "The journal's 2016 Thomson Reuters (now Claryvate) Impact Factor"
wording(crosaspublisher) <- "The journal's publisher. Where a journal is published by a publisher 'on behalf of' a professional organization, we list the publisher, not the association"
wording(crosasissn) <- "The journal's International Standard Serial Number (ISSN)"
})
filename_codebook = paste("study_comparison_codebook_", Sys.Date(), sep = "", ".txt")
policy_codebook <- codebook(codebook_data)
Write(policy_codebook, file = here(filename_codebook))
```