diff --git a/TEXT MINING ASSIGNMENT(elon musk).ipynb b/TEXT MINING ASSIGNMENT(elon musk).ipynb
new file mode 100644
index 0000000..aab16cf
--- /dev/null
+++ b/TEXT MINING ASSIGNMENT(elon musk).ipynb
@@ -0,0 +1,1100 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "fe72c2fa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd \n",
+ "import numpy as np \n",
+ "import string # specal operation on string\n",
+ "import spacy # language model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "9fc118c7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from matplotlib.pyplot import imread\n",
+ "from matplotlib import pyplot as plt\n",
+ "from wordcloud import WordCloud, STOPWORDS\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "id": "2258910b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data=pd.read_csv(\"Elon_musk.csv\",error_bad_lines=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 115,
+ "id": "570caa3a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " Text | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " @kunalb11 I�m an alien | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " @ID_AA_Carmack Ray tracing on Cyberpunk with H... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " @joerogan @Spotify Great interview! | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " @gtera27 Doge is underestimated | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " @teslacn Congratulations Tesla China for amazi... | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1994 | \n",
+ " 1995 | \n",
+ " @flcnhvy True, it sounds so surreal, but the n... | \n",
+ "
\n",
+ " \n",
+ " 1995 | \n",
+ " 1996 | \n",
+ " @PPathole Make sure to read ur terms & con... | \n",
+ "
\n",
+ " \n",
+ " 1996 | \n",
+ " 1997 | \n",
+ " @TeslaGong @PPathole Samwise Gamgee | \n",
+ "
\n",
+ " \n",
+ " 1997 | \n",
+ " 1998 | \n",
+ " @PPathole Altho Dumb and Dumber is <U+0001F525... | \n",
+ "
\n",
+ " \n",
+ " 1998 | \n",
+ " 1999 | \n",
+ " Progress update August 28 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1999 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 Text\n",
+ "0 1 @kunalb11 I�m an alien\n",
+ "1 2 @ID_AA_Carmack Ray tracing on Cyberpunk with H...\n",
+ "2 3 @joerogan @Spotify Great interview!\n",
+ "3 4 @gtera27 Doge is underestimated\n",
+ "4 5 @teslacn Congratulations Tesla China for amazi...\n",
+ "... ... ...\n",
+ "1994 1995 @flcnhvy True, it sounds so surreal, but the n...\n",
+ "1995 1996 @PPathole Make sure to read ur terms & con...\n",
+ "1996 1997 @TeslaGong @PPathole Samwise Gamgee\n",
+ "1997 1998 @PPathole Altho Dumb and Dumber is \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " @kunalb11 I�m an alien | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " @ID_AA_Carmack Ray tracing on Cyberpunk with H... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " @joerogan @Spotify Great interview! | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " @gtera27 Doge is underestimated | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " @teslacn Congratulations Tesla China for amazi... | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1994 | \n",
+ " @flcnhvy True, it sounds so surreal, but the n... | \n",
+ "
\n",
+ " \n",
+ " 1995 | \n",
+ " @PPathole Make sure to read ur terms & con... | \n",
+ "
\n",
+ " \n",
+ " 1996 | \n",
+ " @TeslaGong @PPathole Samwise Gamgee | \n",
+ "
\n",
+ " \n",
+ " 1997 | \n",
+ " @PPathole Altho Dumb and Dumber is <U+0001F525... | \n",
+ "
\n",
+ " \n",
+ " 1998 | \n",
+ " Progress update August 28 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "1999 rows × 1 columns
\n",
+ ""
+ ],
+ "text/plain": [
+ " x\n",
+ "0 @kunalb11 I�m an alien\n",
+ "1 @ID_AA_Carmack Ray tracing on Cyberpunk with H...\n",
+ "2 @joerogan @Spotify Great interview!\n",
+ "3 @gtera27 Doge is underestimated\n",
+ "4 @teslacn Congratulations Tesla China for amazi...\n",
+ "... ...\n",
+ "1994 @flcnhvy True, it sounds so surreal, but the n...\n",
+ "1995 @PPathole Make sure to read ur terms & con...\n",
+ "1996 @TeslaGong @PPathole Samwise Gamgee\n",
+ "1997 @PPathole Altho Dumb and Dumber is \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " @kunalb11 I�m an alien | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " @ID_AA_Carmack Ray tracing on Cyberpunk with H... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " @joerogan @Spotify Great interview! | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " @gtera27 Doge is underestimated | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " @teslacn Congratulations Tesla China for amazi... | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1994 | \n",
+ " @flcnhvy True, it sounds so surreal, but the n... | \n",
+ "
\n",
+ " \n",
+ " 1995 | \n",
+ " @PPathole Make sure to read ur terms & con... | \n",
+ "
\n",
+ " \n",
+ " 1996 | \n",
+ " @TeslaGong @PPathole Samwise Gamgee | \n",
+ "
\n",
+ " \n",
+ " 1997 | \n",
+ " @PPathole Altho Dumb and Dumber is <U+0001F525... | \n",
+ "
\n",
+ " \n",
+ " 1998 | \n",
+ " Progress update August 28 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "1999 rows × 1 columns
\n",
+ ""
+ ],
+ "text/plain": [
+ " x\n",
+ "0 @kunalb11 I�m an alien\n",
+ "1 @ID_AA_Carmack Ray tracing on Cyberpunk with H...\n",
+ "2 @joerogan @Spotify Great interview!\n",
+ "3 @gtera27 Doge is underestimated\n",
+ "4 @teslacn Congratulations Tesla China for amazi...\n",
+ "... ...\n",
+ "1994 @flcnhvy True, it sounds so surreal, but the n...\n",
+ "1995 @PPathole Make sure to read ur terms & con...\n",
+ "1996 @TeslaGong @PPathole Samwise Gamgee\n",
+ "1997 @PPathole Altho Dumb and Dumber is \n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " I�m an alien | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " _AA_Carmack Ray tracing on Cyberpunk with HDR ... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Great interview! | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Doge is underestimated | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Congratulations Tesla China for amazing execu... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ ""
+ ],
+ "text/plain": [
+ " x\n",
+ "0 I�m an alien\n",
+ "1 _AA_Carmack Ray tracing on Cyberpunk with HDR ...\n",
+ "2 Great interview!\n",
+ "3 Doge is underestimated\n",
+ "4 Congratulations Tesla China for amazing execu..."
+ ]
+ },
+ "execution_count": 122,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[\"x\"] = data[\"x\"].apply(cleantext)\n",
+ "\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "id": "51b38df2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ " subjectivity | \n",
+ " polarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " I�m an alien | \n",
+ " 0.750000 | \n",
+ " -0.250000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " _AA_Carmack Ray tracing on Cyberpunk with HDR ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Great interview! | \n",
+ " 0.750000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Doge is underestimated | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Congratulations Tesla China for amazing execu... | \n",
+ " 0.366667 | \n",
+ " 0.345313 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1994 | \n",
+ " True, it sounds so surreal, but the negative ... | \n",
+ " 0.508333 | \n",
+ " 0.111111 | \n",
+ "
\n",
+ " \n",
+ " 1995 | \n",
+ " Make sure to read ur terms & conditions b... | \n",
+ " 0.888889 | \n",
+ " 0.625000 | \n",
+ "
\n",
+ " \n",
+ " 1996 | \n",
+ " Samwise Gamgee | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 1997 | \n",
+ " Altho Dumb and Dumber is <U+0001F525><U+0001F... | \n",
+ " 0.500000 | \n",
+ " -0.375000 | \n",
+ "
\n",
+ " \n",
+ " 1998 | \n",
+ " Progress update August 28 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1999 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " x subjectivity \\\n",
+ "0 I�m an alien 0.750000 \n",
+ "1 _AA_Carmack Ray tracing on Cyberpunk with HDR ... 0.000000 \n",
+ "2 Great interview! 0.750000 \n",
+ "3 Doge is underestimated 0.000000 \n",
+ "4 Congratulations Tesla China for amazing execu... 0.366667 \n",
+ "... ... ... \n",
+ "1994 True, it sounds so surreal, but the negative ... 0.508333 \n",
+ "1995 Make sure to read ur terms & conditions b... 0.888889 \n",
+ "1996 Samwise Gamgee 0.000000 \n",
+ "1997 Altho Dumb and Dumber is "
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "from wordcloud import WordCloud\n",
+ "\n",
+ "allwords = \" \".join([x for x in data[\"x\"]])\n",
+ "wordCloud = WordCloud(width = 1000, height = 1000, random_state = 21, max_font_size = 119).generate(allwords)\n",
+ "plt.figure(figsize=(20, 20), dpi=80)\n",
+ "plt.imshow(wordCloud, interpolation = \"bilinear\")\n",
+ "plt.axis(\"off\")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 127,
+ "id": "1b1a046c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ " subjectivity | \n",
+ " polarity | \n",
+ " analysis | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " I�m an alien | \n",
+ " 0.750000 | \n",
+ " -0.250000 | \n",
+ " Negative | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " _AA_Carmack Ray tracing on Cyberpunk with HDR ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " Neutral | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Great interview! | \n",
+ " 0.750000 | \n",
+ " 1.000000 | \n",
+ " Positive | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Doge is underestimated | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " Neutral | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Congratulations Tesla China for amazing execu... | \n",
+ " 0.366667 | \n",
+ " 0.345313 | \n",
+ " Positive | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1994 | \n",
+ " True, it sounds so surreal, but the negative ... | \n",
+ " 0.508333 | \n",
+ " 0.111111 | \n",
+ " Positive | \n",
+ "
\n",
+ " \n",
+ " 1995 | \n",
+ " Make sure to read ur terms & conditions b... | \n",
+ " 0.888889 | \n",
+ " 0.625000 | \n",
+ " Positive | \n",
+ "
\n",
+ " \n",
+ " 1996 | \n",
+ " Samwise Gamgee | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " Neutral | \n",
+ "
\n",
+ " \n",
+ " 1997 | \n",
+ " Altho Dumb and Dumber is <U+0001F525><U+0001F... | \n",
+ " 0.500000 | \n",
+ " -0.375000 | \n",
+ " Negative | \n",
+ "
\n",
+ " \n",
+ " 1998 | \n",
+ " Progress update August 28 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " Neutral | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1999 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " x subjectivity \\\n",
+ "0 I�m an alien 0.750000 \n",
+ "1 _AA_Carmack Ray tracing on Cyberpunk with HDR ... 0.000000 \n",
+ "2 Great interview! 0.750000 \n",
+ "3 Doge is underestimated 0.000000 \n",
+ "4 Congratulations Tesla China for amazing execu... 0.366667 \n",
+ "... ... ... \n",
+ "1994 True, it sounds so surreal, but the negative ... 0.508333 \n",
+ "1995 Make sure to read ur terms & conditions b... 0.888889 \n",
+ "1996 Samwise Gamgee 0.000000 \n",
+ "1997 Altho Dumb and Dumber is "
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.figure(figsize=(10, 8))\n",
+ "\n",
+ "for i in range(0, data.shape[0]):\n",
+ " plt.scatter(data[\"polarity\"][i], data[\"subjectivity\"][i], color = \"Red\")\n",
+ "\n",
+ "plt.title(\"Sentiment Analysis\") # Add The Graph Title\n",
+ "plt.xlabel(\"Polarity\") # Add The X-Label\n",
+ "plt.ylabel(\"Subjectivity\") # Add The Y-Label\n",
+ "plt.show() # Showing The Graph"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 194,
+ "id": "00502eff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
+ "vectorizer = CountVectorizer()\n",
+ "X = vectorizer.fit_transform(data[\"x\"])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 195,
+ "id": "569273d4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "an 474\n",
+ "alien 438\n",
+ "_aa_carmack 215\n",
+ "ray 2969\n",
+ "tracing 3750\n",
+ " ... \n",
+ "clicking 921\n",
+ "accept 363\n",
+ "samwise 3149\n",
+ "gamgee 1663\n",
+ "altho 456\n",
+ "Length: 4117, dtype: int64"
+ ]
+ },
+ "execution_count": 195,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.Series(vectorizer.vocabulary_)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 197,
+ "id": "dbff8b5b",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "the 403\n",
+ "to 390\n",
+ "is 347\n",
+ "a 324\n",
+ "of 301\n",
+ "& 216\n",
+ "in 203\n",
+ "for 177\n",
+ "be 154\n",
+ "will 136\n",
+ "on 124\n",
+ ": 117\n",
+ "I 114\n",
+ "but 109\n",
+ "that 104\n",
+ "with 99\n",
+ "are 95\n",
+ "it 89\n",
+ "at 85\n",
+ "Tesla 85\n",
+ "The 79\n",
+ "we 77\n",
+ "� 76\n",
+ "and 72\n",
+ "this 72\n",
+ "from 70\n",
+ "have 69\n",
+ "was 68\n",
+ "as 63\n",
+ "This 60\n",
+ "you 58\n",
+ "We 58\n",
+ "has 55\n",
+ "not 54\n",
+ "Yes 50\n",
+ "so 48\n",
+ "more 48\n",
+ "just 47\n",
+ "than 44\n",
+ "should 43\n",
+ "an 39\n",
+ "all 39\n",
+ "can 39\n",
+ "or 37\n",
+ "do 37\n",
+ "It�s 37\n",
+ "like 37\n",
+ "great 36\n",
+ "would 36\n",
+ "launch 36\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 197,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Word frequency\n",
+ "freq = pd.Series(' '.join(data[\"x\"]).split()).value_counts()[:50] # for top 20\n",
+ "freq"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7753767a",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ },
+ "varInspector": {
+ "cols": {
+ "lenName": 16,
+ "lenType": 16,
+ "lenVar": 40
+ },
+ "kernels_config": {
+ "python": {
+ "delete_cmd_postfix": "",
+ "delete_cmd_prefix": "del ",
+ "library": "var_list.py",
+ "varRefreshCmd": "print(var_dic_list())"
+ },
+ "r": {
+ "delete_cmd_postfix": ") ",
+ "delete_cmd_prefix": "rm(",
+ "library": "var_list.r",
+ "varRefreshCmd": "cat(var_dic_list()) "
+ }
+ },
+ "types_to_exclude": [
+ "module",
+ "function",
+ "builtin_function_or_method",
+ "instance",
+ "_Feature"
+ ],
+ "window_display": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}