from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition import TruncatedSVD from sklearn.mixture import GaussianMixture
# 1. Sample Text Data documents = [ "Apple is looking at buying U.K. startup for $1 billion", "Autonomous cars shift insurance liability toward manufacturers", "San Francisco considers banning sidewalk delivery robots", "London is a big financial center", "Startup companies are everywhere in the Valley" ]
# 2. Vectorize Text vectorizer = TfidfVectorizer(stop_words='english') X_tfidf = vectorizer.fit_transform(documents)
Comments
from sklearn.decomposition import TruncatedSVD
from sklearn.mixture import GaussianMixture
# 1. Sample Text Data
documents = [
"Apple is looking at buying U.K. startup for $1 billion",
"Autonomous cars shift insurance liability toward manufacturers",
"San Francisco considers banning sidewalk delivery robots",
"London is a big financial center",
"Startup companies are everywhere in the Valley"
]
# 2. Vectorize Text
vectorizer = TfidfVectorizer(stop_words='english')
X_tfidf = vectorizer.fit_transform(documents)
# 3. Reduce Dimensionality
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_tfidf)
# 4. Fit GMM
gmm = GaussianMixture(n_components=2, random_state=0)
labels = gmm.fit_predict(X_reduced)
# 5. Output clustering result
for doc, label in zip(documents, labels):
print(f"Cluster {label}: {doc}")