This repository is for collection, cleaning, and analysis scripts used with data from the reporting database for Northwestern University's instance of Symplectic Elements, a research information management system. The R script reshape_pub_source_ids.r is used to tidy data extracted from the database to make it easier to process.
Elements uses SQL Server so these are T-SQL queries...
use [Elements-reporting2]
SELECT g.name, pr.[Publication ID], doi, [Data Source], [Data Source Proprietary ID]
FROM [dbo].[Publication Record] as pr
join [dbo].[Publication User Relationship] as pu on pr.[Publication ID] = pu.[Publication ID]
join [dbo].[Group User Membership] as gu on gu.[User ID] = pu.[User ID]
join [dbo].[Group] as g on g.[ID] = gu.[Group ID]
WHERE [publication-date] > YYYYMMDD AND [publication-date] <= YYYYMMDD AND g.name = 'group_name'
ORDER BY pr.[Publication ID]
-- NOTE: Pubs may have duplicate publication dates, DOIs, and proprietary IDs (Scopus, ORCiD, WOS, etc.)
use [Elements-reporting2]
SELECT u.[Last Name], u.[First Name], u.[Username], u.[Department], g.[name], pr.[Publication ID], pr.[publication-date], pr.[authors], pr.[title], pr.[journal], pr.[publication-status], pr.[types], pr.[external-identifiers], pr.[doi], pr.[Data Source Proprietary ID], pr.[Data Source]
FROM [dbo].[Publication Record] as pr
join [dbo].[Publication User Relationship] as pu on pr.[Publication ID] = pu.[Publication ID]
join [dbo].[Group User Membership] as gu on gu.[User ID] = pu.[User ID]
join [dbo].[User] as u on u.[ID] = pu.[User ID]
join [dbo].[Group] as g on g.[ID] = gu.[Group ID]
WHERE [publication-date] > YYYYMMDD AND [publication-date] <= YYYYMMDD AND g.name = 'group_name'
ORDER BY u.[Last Name]
-- NOTE: Pubs may have duplicate publication dates, DOIs, and proprietary IDs (Scopus, ORCiD, WOS, etc.)
use [Elements-reporting2]
SELECT g.name as "Group Name", u.[Last Name], u.[First Name], u.[Department], u.Username, uia.[Identifier Value] as "Scopus AU-ID", pr.[Publication ID], pr.[publication-date], doi, pr.[Data Source], pr.[Data Source Proprietary ID]
FROM [dbo].[Publication Record] as pr
join [dbo].[Publication User Relationship] as pu on pr.[Publication ID] = pu.[Publication ID]
join [dbo].[Group User Membership] as gu on gu.[User ID] = pu.[User ID]
join [dbo].[User] as u on u.[ID] = pu.[User ID]
join [dbo].[User Identifier] as ui on ui.[User ID] = u.[ID]
join [dbo].[Identifier Scheme] as idsch on idsch.ID = uia.[Identifier Scheme ID]
join [dbo].[Group] as g on g.[ID] = gu.[Group ID]
WHERE pr.[publication-date] > YYYYMMDD AND pr.[publication-date] <= YYYYMMDD AND g.name = 'group_name_1'
OR pr.[publication-date] > YYYYMMDD AND pr.[publication-date] <= YYYYMMDD AND g.name = 'group_name_2'
OR pr.[publication-date] > YYYYMMDD AND pr.[publication-date] <= YYYYMMDD AND g.name = 'group_name_3'
ORDER BY u.[Last Name]