Naive Bayes

In machine learning, naive Bayes classifiers are a family of simple probabilistic classifiers based on applying Bayes’ theorem with strong (naive) independence assumptions between the features.


Spam Classification

spam classification

Sentiment Analysis on Movie Reviews

import numpy as np
from nltk.probability import FreqDist
from nltk.classify import SklearnClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

pipeline = Pipeline([('tfidf', TfidfTransformer()),
                     ('chi2', SelectKBest(chi2, k=1000)),
                     ('nb', MultinomialNB())])

clf = SklearnClassifier(pipeline)

from nltk.corpus import movie_reviews
pos = [FreqDist(movie_reviews.words(i)) for i in movie_reviews.fileids('pos')]
neg = [FreqDist(movie_reviews.words(i)) for i in movie_reviews.fileids('neg')]
add_label = lambda lst, lab: [(x, lab) for x in lst]
clf.train(add_label(pos[:100], 'pos') + add_label(neg[:100], 'neg'))

l_pos = np.array(clf.classify_many(pos[100:]))
l_neg = np.array(clf.classify_many(neg[100:]))
print "Confusion matrix:\n%d\t%d\n%d\t%d" % (
          (l_pos == 'pos').sum(), (l_pos == 'neg').sum(),
          (l_neg == 'pos').sum(), (l_neg == 'neg').sum())

Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s