-
Notifications
You must be signed in to change notification settings - Fork 82
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Visualize manuscript/project growth #1019
Changes from 16 commits
f930773
051cf81
feb6b92
c0afb29
701bb87
d1d9869
4097f43
6c90ab6
4e18224
16b65e3
1f8e23d
0157828
826ec62
100a617
94a2025
1c9f60f
d01b81b
199b8ca
37c9270
f966e34
ded441f
461c872
8485a01
472267e
427b1d7
5067163
8fe0db9
fc53320
3fbea6b
2ce888a
1b404b8
23e6888
995433d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import json | ||
import subprocess | ||
import pandas as pd | ||
import matplotlib | ||
import argparse | ||
import multiprocessing | ||
|
||
def analyze_commit(commit): | ||
"""Access files and data in variables.json associated with each commit | ||
Accepts commit ID # as string | ||
Returns list of 5 statistics""" | ||
|
||
variablesCommand = "git show " + commit + ":./variables.json" | ||
variables = json.loads(subprocess.getoutput(variablesCommand)) | ||
|
||
date = variables['pandoc']['date-meta'] | ||
clean_date = variables['manubot']['date'] | ||
num_authors = len(variables['manubot']['authors']) | ||
word_count = variables['manubot']['manuscript_stats']['word_count'] | ||
|
||
# Access files and data in references.json associated with each commit | ||
referencesCommand = "git show " + commit + ":./references.json" | ||
references = json.loads(subprocess.getoutput(referencesCommand)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add the try/except here as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for catching this! I also moved the string creation outside of the try/except block, since that shouldn't fail (and definitely not with a JSON error). |
||
num_ref = len(references) | ||
|
||
return([date, clean_date, num_authors, word_count/1000, num_ref]) | ||
|
||
def main(args): | ||
'''Extract statistics from the output branch log''' | ||
|
||
# Access the variables.json and references.json files associated with each commit and store in dictionary | ||
with open(args.commit_list, "r") as commitFile: | ||
commits = [c.strip() for c in commitFile.read().splitlines()] | ||
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) | ||
commitData = dict(zip(commits, pool.map(analyze_commit, commits))) | ||
pool.close() | ||
pool.join() | ||
|
||
# Convert dictionary to dataframe | ||
growthdata = pd.DataFrame.from_dict(commitData, orient="index", | ||
columns=["Date", "clean_date", "Authors", "Word Count", "References"]) | ||
manuscript_stats = growthdata.iloc[0].to_dict() | ||
for item in ["Authors", "Word Count", "References"]: | ||
manuscript_stats[item] = str(manuscript_stats[item]) | ||
growthdata = growthdata[::-1] | ||
growthdata = growthdata.set_index("Date") | ||
|
||
# Plot the data | ||
axes = growthdata.plot(kind='line', linewidth=2, subplots=True) | ||
for ax in axes: | ||
ax.set_ylabel('Count') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the word count, this will need to be |
||
ax.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter( | ||
lambda x, p: format(int(x), ','))) | ||
ax.set_ylim(bottom=0) | ||
ax.spines['top'].set_visible(False) | ||
ax.spines['right'].set_visible(False) | ||
ax.minorticks_off() | ||
ax.grid(color="lightgray") | ||
|
||
ax.figure.savefig(args.output_figure + '.png', dpi=300, bbox_inches = "tight") | ||
ax.figure.savefig(args.output_figure + '.svg', bbox_inches = "tight") | ||
agitter marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
print(f'Wrote {args.output_figure}.png and {args.output_figure}.svg') | ||
|
||
# Write json output file | ||
with open(args.output_json, 'w') as out_file: | ||
json.dump(manuscript_stats, out_file, indent=2, sort_keys=True) | ||
print(f'Wrote {args.output_json}') | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser( | ||
description=__doc__, | ||
formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument('commit_list', | ||
help='List of all commits on output branch, one per line', | ||
rando2 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
type=str) | ||
parser.add_argument('output_json', | ||
help='Path of the JSON file with extracted statistics', | ||
type=str) | ||
parser.add_argument('output_figure', | ||
help='Path of the output figure for manuscript ' \ | ||
'statistics without file type extension. Will be saved ' \ | ||
'as .png and .svg.', | ||
type=str) | ||
args = parser.parse_args() | ||
main(args) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Calculate growth statistics for manuscript based on Manubot files | ||
rando2 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Generate list of all commits in history of output branch | ||
echo "Generate log for output branch" | ||
git log --pretty=format:"%h" --first-parent output > analyze-ms-stats/output-commits.txt | ||
|
||
# Define input and output files | ||
COMMIT_LIST=analyze-ms-stats/output-commits.txt | ||
OUTPUT_JSON=analyze-ms-stats/manuscript_stats.json | ||
OUTPUT_FIG=content/images/manuscript_stats | ||
|
||
# Run python script | ||
echo "Run python script" | ||
python analyze-ms-stats/calc-manuscript-stats.py $COMMIT_LIST $OUTPUT_JSON $OUTPUT_FIG | ||
|
||
# Clean up temporary files | ||
echo "Clean up temporary files" | ||
rm ./references_tmp.json ./variables_tmp.json analyze-ms-stats/output-commits.txt |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"Authors": "52", | ||
"Date": "2021-08-27", | ||
"References": "1585", | ||
"Word Count": "134.698", | ||
"clean_date": "August 27, 2021" | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens here if the command (
git show...
) fails for some reason? Is that something that could potentially happen or not?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good call! It shouldn't but certainly if the wrong branch was somehow checked out, I can imagine it causing issues.