Skip to content

Commit

Permalink
append mode
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelKatsoulis committed Dec 17, 2017
1 parent 8e22960 commit 8720eb9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
1 change: 1 addition & 0 deletions algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def store_to_database(data):
new_article['website'] = article.get('Website')
new_article['website_url'] = article.get('Website url')
new_article['checked'] = False
new_article['appended'] = False
exists = development_articles.find_one(
{"item_url": article.get('Article')})
if exists is None:
Expand Down
18 changes: 14 additions & 4 deletions script.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
import os
from pymongo import MongoClient
import pandas as pd
# converts mongodb collection to csv


def convert_collection_to_df(mongo_cli, collection, match):
def convert_collection_to_df(mongo_cli, collection, field1, match1,
field2, match2):
dbcli = mongo_cli
scrader_db = dbcli['scrader']
cursor = scrader_db[collection].find(match, {'_id': False})
cursor = scrader_db[collection].find({'$and': [{field1: {'$in': match1}},
{field2: {'$in': match2}}]},
{'_id': False})
return pd.DataFrame(list(cursor))


if __name__ == '__main__':
dataframe = convert_collection_to_df(MongoClient(), 'dev_articles',
{'checked': True})
dataframe.to_csv("./TrainingData.csv", encoding='utf-8')
'checked', [True],
'appended', [False])
# if file does not exist write header
if not os.path.isfile('TrainingData.csv'):
dataframe.to_csv('TrainingData.csv', encoding='utf-8')
else: # else it exists so append without writing the header
dataframe.to_csv('filename.csv', mode='a', header=False,
encoding='utf-8')

0 comments on commit 8720eb9

Please sign in to comment.