Skip to content

Commit

Permalink
add scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
phfaist committed Dec 29, 2023
1 parent d8fee07 commit 9cc295c
Show file tree
Hide file tree
Showing 8 changed files with 297 additions and 3 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

.parcel-cache


.yarn/*
!.yarn/patches
!.yarn/plugins
Expand All @@ -20,3 +19,5 @@ node_modules

previewtool/dist
_zoodb_citations_cache

scripts/output.txt
2 changes: 1 addition & 1 deletion _zoodb_citations_cache/cache_compiled_citations.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion _zoodb_citations_cache/cache_downloaded_info.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"eczoodb",
"jscomponents",
"site",
"scripts",
"previewtool",
"helpers/parcel-namer-own-folder-hashes",
"helpers/parcel-transformer-ogimage-phf"
Expand Down
71 changes: 71 additions & 0 deletions scripts/helperEcZooLoader.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import debugm from 'debug';
const debug = debugm('eczoo_sitegen.scripts.helperEcZooLoader');

import fs from 'fs';
import path from 'path';

import _ from 'lodash';

import { ZooDbDataLoaderHandler } from '@phfaist/zoodb';
import { createEcZooDb } from '@errorcorrectionzoo/eczoodb/eczoodb.js';
import { createEcZooYamlDbDataLoader } from '@errorcorrectionzoo/eczoodb/load_yamldb.js';

import { zoo_permalinks } from '@errorcorrectionzoo/eczoodb/permalinks.js';

import { get_eczoo_full_options } from '@errorcorrectionzoo/eczoodb/fullopts.js';

import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);



export async function loadEcZoo({ dataDir, } = {})
{
debug(`loadEcZoo(), dataDir=${dataDir}`);

dataDir ??= path.join(__dirname, '..', '..', 'eczoo_data');

debug(`Using dataDir=‘${dataDir}’`);

const eczoodbopts = _.merge(
{
fs,
fs_data_dir: dataDir,
},
get_eczoo_full_options({
citationsinfo_cache_dir: path.join(__dirname, '..', '_zoodb_citations_cache'),
}),
{
flm_options: {
resources: {
rename_figure_template:
(f) => `fig-${f.b32hash(24)}.pdf`,
},
},
zoo_permalinks: {
object: (object_type, object_id) => {
return 'https://errorcorrectionzoo.org'
+ zoo_permalinks.object(object_type, object_id);
},
graphics_resource: (graphics_resource) =>
`__abstract_fig_reference__/${graphics_resource.src_url}`
},
},
);

let eczoodb = await createEcZooDb(eczoodbopts);
const yaml_loader = await createEcZooYamlDbDataLoader(eczoodb);
const loader_handler = new ZooDbDataLoaderHandler(
yaml_loader,
{
throw_reload_errors: false, // for when in devel mode with eleventy
}
);

await eczoodb.install_zoo_loader_handler(loader_handler);

await eczoodb.load();

return eczoodb;
}
18 changes: 18 additions & 0 deletions scripts/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "@errorcorrectionzoo/scripts",
"version": "0.0.1",
"packageManager": "yarn@3.3.0",
"private": true,
"type": "module",
"dependencies": {
"@errorcorrectionzoo/eczoodb": ">=0.0.1",
"@phfaist/zoodb": "https://github.com/phfaist/zoodb.git#main",
"debug": "^4.3.4",
"lodash": "^4.17.21",
"yargs": "^17.7.2"
},
"peerDependencies": {
"@errorcorrectionzoo/eczoodb": "*",
"@phfaist/zoodb": "*"
}
}
155 changes: 155 additions & 0 deletions scripts/query_bib_references.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import debugm from 'debug';
const debug = debugm('eczoo_sitegen.scripts.query_bib_references');

import fs from 'fs';

import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';

import { loadEcZoo } from './helperEcZooLoader.js';

//
// Core script function. Loads the zoo and performs the desired analysis.
//
async function runmain(args)
{
process.stderr.write('runmain(): loading zoo... (might take a couple minutes)\n');
const eczoodb = await loadEcZoo({ dataDir: args.dataDir });

process.stderr.write('runmain(): zoo is now loaded!\n');

//
// Zoo is loaded (eczoodb). Query anything we need from it at this point.
//

const encountered_citations = eczoodb.zoo_flm_processor.scanner.get_encountered('citations');

let data = {};

for (const encountered_citation of encountered_citations) {
//debug(`encountered citation, `, encountered_citation);
const { cite_prefix, cite_key } = encountered_citation;
const { object_type, object_id, source_path } =
encountered_citation.encountered_in.resource_info;
if (args.citePrefix && args.citePrefix !== cite_prefix) {
continue;
}
if (args.domain) {
if (object_type !== 'code') {
continue;
}
const code = eczoodb.objects.code[object_id];
const domains = eczoodb.code_parent_domains(code, { find_domain_id: args.domain });
if (domains.length !== 1 || domains[0] !== args.domain) {
continue;
}
}
if (data[cite_prefix] == null) {
data[cite_prefix] = {};
}
if (data[cite_prefix][cite_key] == null) {
data[cite_prefix][cite_key] = [];
}
data[cite_prefix][cite_key].push({ source_path });
}

//
// Our relevant data is stored in `data`. Now we output the information the
// way the user requested it.
//

let outputData = '';

if (args.format === 'json') {

let data2 = _.merge({}, data);
if (!args.includeSourcePath) {
for (const cite_prefix of [...Object.keys(data)]) {
for (const cite_key of [...Object.keys(data[cite_prefix])]) {
data[cite_prefix][cite_key] = true;
}
}
}
outputData = JSON.stringify(data2, null, 4);

} else if (args.format === 'txt') {

let cite_prefix_list = Object.keys(data);
cite_prefix_list.sort();
for (const cite_prefix of cite_prefix_list) {
const db = data[cite_prefix];
let cite_key_list = Object.keys(db);
cite_key_list.sort();
for (const cite_key of cite_key_list) {
const db2 = db[cite_key];
outputData += `${cite_prefix}:${cite_key}`;
if (args.includeSourcePath) {
outputData += ` :\n` + db2.map( (x) => `\t\t${x.source_path}\n` ).join('');
} else {
outputData += `\n`;
}
}
}

}

if ( ! args.output ) {
process.stdout.write(outputData);
} else {
process.stderr.write(`Writing output to ${args.output}\n`);
fs.writeFileSync( args.output, outputData );
}

return;
}


//
// Main function. Parse command-line arguments and call runmain().
//
async function main()
{
const args = yargs(hideBin(process.argv))
.scriptName('query_bib_references')
.usage('Usage: $0 [options]')
.options({
'data-dir': {
alias: 'd',
default: null,
describe: "Data repository folder (defaults to sibling `eczoo_data` folder)",
},
'cite-prefix': {
alias: 'p',
default: null,
describe: "Only include citations with the given cite_prefix (e.g. 'doi' or 'arxiv')",
},
'domain': {
alias: 'D',
default: null,
describe: "Only include citations in codes that belong to the given domain_id",
},
'format': {
alias: 'f',
default: 'txt',
describe: "Output format ('txt' or 'json')",
},
'output': {
alias: 'o',
default: null,
describe: "Output file to write to (default stdout)",
},
'include-source-path': {
alias: 's',
default: true,
boolean: true,
describe: "Whether or not to include the files where the citation was encountered",
},
})
.strictOptions()
.argv
;

await runmain(args);
}

await main();
48 changes: 48 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,21 @@ __metadata:
languageName: unknown
linkType: soft

"@errorcorrectionzoo/scripts@workspace:scripts":
version: 0.0.0-use.local
resolution: "@errorcorrectionzoo/scripts@workspace:scripts"
dependencies:
"@errorcorrectionzoo/eczoodb": ">=0.0.1"
"@phfaist/zoodb": "https://github.com/phfaist/zoodb.git#main"
debug: ^4.3.4
lodash: ^4.17.21
yargs: ^17.7.2
peerDependencies:
"@errorcorrectionzoo/eczoodb": "*"
"@phfaist/zoodb": "*"
languageName: unknown
linkType: soft

"@errorcorrectionzoo/site@workspace:site":
version: 0.0.0-use.local
resolution: "@errorcorrectionzoo/site@workspace:site"
Expand Down Expand Up @@ -3362,6 +3377,17 @@ __metadata:
languageName: node
linkType: hard

"cliui@npm:^8.0.1":
version: 8.0.1
resolution: "cliui@npm:8.0.1"
dependencies:
string-width: ^4.2.0
strip-ansi: ^6.0.1
wrap-ansi: ^7.0.0
checksum: 79648b3b0045f2e285b76fb2e24e207c6db44323581e421c3acbd0e86454cba1b37aea976ab50195a49e7384b871e6dfb2247ad7dec53c02454ac6497394cb56
languageName: node
linkType: hard

"clone@npm:^2.1.1":
version: 2.1.2
resolution: "clone@npm:2.1.2"
Expand Down Expand Up @@ -11104,6 +11130,13 @@ __metadata:
languageName: node
linkType: hard

"yargs-parser@npm:^21.1.1":
version: 21.1.1
resolution: "yargs-parser@npm:21.1.1"
checksum: ed2d96a616a9e3e1cc7d204c62ecc61f7aaab633dcbfab2c6df50f7f87b393993fe6640d017759fe112d0cb1e0119f2b4150a87305cc873fd90831c6a58ccf1c
languageName: node
linkType: hard

"yargs-unparser@npm:2.0.0":
version: 2.0.0
resolution: "yargs-unparser@npm:2.0.0"
Expand Down Expand Up @@ -11131,6 +11164,21 @@ __metadata:
languageName: node
linkType: hard

"yargs@npm:^17.7.2":
version: 17.7.2
resolution: "yargs@npm:17.7.2"
dependencies:
cliui: ^8.0.1
escalade: ^3.1.1
get-caller-file: ^2.0.5
require-directory: ^2.1.1
string-width: ^4.2.3
y18n: ^5.0.5
yargs-parser: ^21.1.1
checksum: 73b572e863aa4a8cbef323dd911d79d193b772defd5a51aab0aca2d446655216f5002c42c5306033968193bdbf892a7a4c110b0d77954a7fdf563e653967b56a
languageName: node
linkType: hard

"yauzl@npm:^2.10.0":
version: 2.10.0
resolution: "yauzl@npm:2.10.0"
Expand Down

0 comments on commit 9cc295c

Please sign in to comment.