-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscript.py
26 lines (22 loc) · 1.07 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import os
from pymongo import MongoClient
import pandas as pd
# converts mongodb collection to csv
def convert_collection_to_df(mongo_cli, collection, field1, match1,
field2, match2):
dbcli = mongo_cli
scrader_db = dbcli['scrader']
cursor = scrader_db[collection].find({'$and': [{field1: {'$in': match1}},
{field2: {'$in': match2}}]},
{'_id': False})
return pd.DataFrame(list(cursor))
if __name__ == '__main__':
dataframe = convert_collection_to_df(MongoClient(), 'dev_articles',
'checked', [True],
'appended', [False])
# if file does not exist write header
if not os.path.isfile('TrainingData.csv'):
dataframe.to_csv('TrainingData.csv', encoding='utf-8')
else: # else it exists so append without writing the header
dataframe.to_csv('filename.csv', mode='a', header=False,
encoding='utf-8')