-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_recipes.py
More file actions
33 lines (23 loc) · 800 Bytes
/
load_recipes.py
File metadata and controls
33 lines (23 loc) · 800 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import json
from LDA_text_model import LDAtext
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from text_utils import striphtml, remove_words
with open('recipes.json', 'r') as fp:
recipes = json.load(fp)
recipes = recipes['recipes']
summary = [
striphtml(recipes[k]['summary']).replace('spoonacular', '')
for k in range(len(recipes))
]
summary = remove_words(['score', 'recipe', 'take', 'things', 'serving'],
summary)
# LDA
a = LDAtext()
a.train(summary, number_of_topics=2, number_of_passes=30, number_of_workers=4)
print(a.topics())
# KMeans clustering with TF-IDF weights
tfidf_vectorizer = TfidfVectorizer()
tfidf = tfidf_vectorizer.fit_transform(summary)
kmeans = KMeans(n_clusters=2)
kmeans.fit(tfidf)