-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstat.py
31 lines (23 loc) · 831 Bytes
/
stat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import sqlite3
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', choices=['mouse', 'trecgen', '20ng'])
args = parser.parse_args()
conn = sqlite3.connect(f'data/{args.dataset}.sqlite')
cursor = conn.cursor()
labels = {}
desc = {}
for (l, label_desc) in cursor.execute('SELECT label_id, label_desc from Labels'):
labels[l] = 0
desc[l] = label_desc
for (file_id, label_ids) in cursor.execute('SELECT file_id, label_ids FROM Files'):
file_labels = map(int, label_ids.split(','))
for l in file_labels:
labels[l] += 1
total = 0
for l in labels:
total += labels[l]
print('{}: {}'.format(desc[l], labels[l]))
print('-'* 10)
print('total = {}'.format(total))