You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
fromsklearn.feature_extraction.textimportTfidfVectorizerfromsklearn.clusterimportKMeansdocuments= ["the young french men crowned world champions",
"Google Translate app is getting more intelligent everyday",
"Facebook face recognition is driving me crazy",
"who is going to win the Golden Ball title this year",
"these camera apps are funny",
"Croacian team made a brilliant world cup campaign reaching the final match",
"Google Chrome extensions are useful.",
"Social Media apps leveraging AI incredibly",
"Qatar 2022 FIFA world cup is played in winter"]
vectorizer=TfidfVectorizer(stop_words='english')
data=vectorizer.fit_transform(documents)
true_k=2clustering_model=KMeans(n_clusters=true_k,
init='k-means++',
max_iter=300, n_init=10)
clustering_model.fit(data)
## terms per clustersorted_centroids=clustering_model.cluster_centers_.argsort()[:, ::-1]
terms=vectorizer.get_feature_names()
foriinrange(true_k):
print("Cluster %d:"%i, end='')
forindinsorted_centroids[i, :10]:
print(' %s'%terms[ind], end='')
print()
print()
print()
# Cluster 0: apps google funny camera extensions useful chrome driving face facebook## Cluster 1: world cup young champions crowned french men qatar fifa played## predicting the cluster of new docsnew_doc= ["how to install Chrome"]
Y=vectorizer.transform(new_doc)
prediction=clustering_model.predict(Y)
print(prediction)
# [0]new_doc= ["UCL Final match is played in Madrid this year"]
Y=vectorizer.transform(new_doc)
prediction=clustering_model.predict(Y)
print(prediction)
# [1]