-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenhanced_classifier.py
36 lines (31 loc) · 1.17 KB
/
enhanced_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# enhanced_classifier.py
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
def train_classifier():
# Sample data
texts = [
"This is a document about machine learning.",
"This document is about Python programming.",
"Machine learning and data science are related fields.",
"Python is a popular programming language.",
"Data science involves machine learning and statistics.",
"Programming in Python is fun and easy."
]
labels = ["ML", "Python", "ML", "Python", "ML", "Python"]
# Create a pipeline that combines TF-IDF and a Support Vector Machine classifier
model = make_pipeline(TfidfVectorizer(), SVC(kernel='linear'))
model.fit(texts, labels)
return model
def classify_text(model, text):
return model.predict([text])[0]
def main():
model = train_classifier()
test_texts = [
"This is a new document about data science.",
"Programming with Python is great."
]
for text in test_texts:
print(f"Text: '{text}' Classified as: {classify_text(model, text)}")
if __name__ == "__main__":
main()