-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanalyst.py
27 lines (22 loc) · 807 Bytes
/
analyst.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()
lst = pd.read_csv(os.getenv("CSV_FILE"))
# lst.dropna(axis=0, inplace=True)
lst = lst.values
# Get number of posts for each tag
tag = [doc[4] for doc in lst]
val1 = dict(sorted(dict((x, tag.count(x))
for x in set(tag)).items(), key=lambda item: item[1]))
print(val1)
# Get list of unique title length
title = [doc[1] for doc in lst]
val2 = sorted(set(len(x.split(" ")) for x in title if str(x) != "nan"))
print(val2)
# Get list of unique timestamp
timestamp = [doc[7] for doc in lst]
val3 = sorted(set(datetime.strptime(time[:time.find(" ")], "%d/%m/%Y").strftime("%Y-%m-%d")
for time in timestamp if time != "Unknown" and str(time) != "nan"))
print(val3)