-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
90123a7
commit a313367
Showing
6 changed files
with
6 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ensemble_SVM_LR_Malayalam.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NmxNxMi5TECW","executionInfo":{"status":"ok","timestamp":1624685064818,"user_tz":-330,"elapsed":20322,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"73bc084c-c68a-44c8-e1da-4730b454edbd"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7akUl9KLBilu"},"source":["import pandas as pd\n","SVM = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/SVM_char_1-6_gram_Malayalam_74.csv')\n","LR = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/LR_char_1-6_gram_Malayalam_73.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"giZvKKReCGFd"},"source":["predictions_test_submission=(SVM+LR)/2"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"pwxeTE5ZC6y3","executionInfo":{"status":"ok","timestamp":1624685124940,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c606c89a-6a0f-47dc-bb4c-2f2f6127e003"},"source":["predictions_test_submission"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Mixed_feelings</th>\n"," <th>Negative</th>\n"," <th>Positive</th>\n"," <th>not-malayalam</th>\n"," <th>unknown_state</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0.382389</td>\n"," <td>0.103209</td>\n"," <td>0.186992</td>\n"," <td>0.080896</td>\n"," <td>0.246514</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>0.063125</td>\n"," <td>0.149519</td>\n"," <td>0.307331</td>\n"," <td>0.008070</td>\n"," <td>0.471956</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>0.062019</td>\n"," <td>0.145768</td>\n"," <td>0.394981</td>\n"," <td>0.036823</td>\n"," <td>0.360409</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>0.002211</td>\n"," <td>0.003588</td>\n"," <td>0.015616</td>\n"," <td>0.891508</td>\n"," <td>0.087077</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0.005525</td>\n"," <td>0.006591</td>\n"," <td>0.957715</td>\n"," <td>0.006389</td>\n"," <td>0.023780</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1957</th>\n"," <td>0.015721</td>\n"," <td>0.281389</td>\n"," <td>0.389391</td>\n"," <td>0.000267</td>\n"," <td>0.313233</td>\n"," </tr>\n"," <tr>\n"," <th>1958</th>\n"," <td>0.008293</td>\n"," <td>0.015068</td>\n"," <td>0.034317</td>\n"," <td>0.011154</td>\n"," <td>0.931168</td>\n"," </tr>\n"," <tr>\n"," <th>1959</th>\n"," <td>0.102701</td>\n"," <td>0.167398</td>\n"," <td>0.432741</td>\n"," <td>0.003643</td>\n"," <td>0.293517</td>\n"," </tr>\n"," <tr>\n"," <th>1960</th>\n"," <td>0.101695</td>\n"," <td>0.344980</td>\n"," <td>0.318964</td>\n"," <td>0.002100</td>\n"," <td>0.232261</td>\n"," </tr>\n"," <tr>\n"," <th>1961</th>\n"," <td>0.191514</td>\n"," <td>0.262473</td>\n"," <td>0.400789</td>\n"," <td>0.003091</td>\n"," <td>0.142132</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1962 rows × 5 columns</p>\n","</div>"],"text/plain":[" Mixed_feelings Negative Positive not-malayalam unknown_state\n","0 0.382389 0.103209 0.186992 0.080896 0.246514\n","1 0.063125 0.149519 0.307331 0.008070 0.471956\n","2 0.062019 0.145768 0.394981 0.036823 0.360409\n","3 0.002211 0.003588 0.015616 0.891508 0.087077\n","4 0.005525 0.006591 0.957715 0.006389 0.023780\n","... ... ... ... ... ...\n","1957 0.015721 0.281389 0.389391 0.000267 0.313233\n","1958 0.008293 0.015068 0.034317 0.011154 0.931168\n","1959 0.102701 0.167398 0.432741 0.003643 0.293517\n","1960 0.101695 0.344980 0.318964 0.002100 0.232261\n","1961 0.191514 0.262473 0.400789 0.003091 0.142132\n","\n","[1962 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"yx6tspZ0CJsv"},"source":["import numpy as np\n","predictions_test_submission = np.asarray(predictions_test_submission)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"yhvtEGcuCMKO"},"source":["import numpy as np\n","predictions_test_submission1 = np.zeros_like(predictions_test_submission)\n","predictions_test_submission1[np.arange(len(predictions_test_submission)), predictions_test_submission.argmax(1)] = 1"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UFz5rdWVDil_","executionInfo":{"status":"ok","timestamp":1624685131170,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"64d670ec-cb71-45a5-aa4b-4973f4ec7dc9"},"source":["predictions_test_submission1"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[1., 0., 0., 0., 0.],\n"," [0., 0., 0., 0., 1.],\n"," [0., 0., 1., 0., 0.],\n"," ...,\n"," [0., 0., 1., 0., 0.],\n"," [0., 1., 0., 0., 0.],\n"," [0., 0., 1., 0., 0.]])"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"EcEP5VhyDktU"},"source":["predictions_test_submission1_final = np.argmax(predictions_test_submission1, axis=1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_-UdPnAFOfo","executionInfo":{"status":"ok","timestamp":1624685133667,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"02518122-bb6f-4902-fb96-ee8a39d1f3e1"},"source":["predictions_test_submission1_final"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0, 4, 2, ..., 2, 1, 2])"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"yj7AcwqbmTKP"},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZnuYD4aqTkWk","executionInfo":{"status":"ok","timestamp":1624685178312,"user_tz":-330,"elapsed":658,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"7dd716ae-503c-42c0-8096-0691658b4f7b"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-malayalam' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o5T7yAcFFpfm"},"source":["predictions_test_submission1_final1 = le.inverse_transform(predictions_test_submission1_final)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PSSBXvkTFze-","executionInfo":{"status":"ok","timestamp":1624685181535,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"58cfe4a7-98fc-453d-afe6-a23c63270ad4"},"source":["predictions_test_submission1_final1"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Mixed_feelings', 'unknown_state', 'Positive', ..., 'Positive',\n"," 'Negative', 'Positive'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"otag6te-F1It"},"source":["data_test['category'] = predictions_test_submission1_final1"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"5ozZTPDwGiSv","executionInfo":{"status":"ok","timestamp":1624685186909,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"4f1f70e7-af9b-4645-9a2c-ff4336200777"},"source":["data_test"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>id</th>\n"," <th>text</th>\n"," <th>category</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>Mal_1</td>\n"," <td>Teaserinu kurach samayamkoodi mathram. Cant wa...</td>\n"," <td>Mixed_feelings</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>Mal_2</td>\n"," <td>അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...</td>\n"," <td>unknown_state</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Mal_3</td>\n"," <td>മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>Mal_4</td>\n"," <td>Nowadays 944k views is considered as 1M views ...</td>\n"," <td>not-malayalam</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Mal_5</td>\n"," <td>Maass.trailer ennu paranja ithaanU makkale.......</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1957</th>\n"," <td>Mal_1958</td>\n"," <td>Expression വച്ചു നോക്കുമ്പോൾ മൂക്കുത്തി കട്ടത്...</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>1958</th>\n"," <td>Mal_1959</td>\n"," <td>എനിക്ക് ഒരു 100 like തരുമോ</td>\n"," <td>unknown_state</td>\n"," </tr>\n"," <tr>\n"," <th>1959</th>\n"," <td>Mal_1960</td>\n"," <td>Nannayi onnu poliyunna lakshnm unde...Thrill m...</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>1960</th>\n"," <td>Mal_1961</td>\n"," <td>Athikam eduthu kayatti vekkenda....direction k...</td>\n"," <td>Negative</td>\n"," </tr>\n"," <tr>\n"," <th>1961</th>\n"," <td>Mal_1962</td>\n"," <td>Ithupole mooonchiya Oru padam jeevithathil kan...</td>\n"," <td>Positive</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1962 rows × 3 columns</p>\n","</div>"],"text/plain":[" id ... category\n","0 Mal_1 ... Mixed_feelings\n","1 Mal_2 ... unknown_state\n","2 Mal_3 ... Positive\n","3 Mal_4 ... not-malayalam\n","4 Mal_5 ... Positive\n","... ... ... ...\n","1957 Mal_1958 ... Positive\n","1958 Mal_1959 ... unknown_state\n","1959 Mal_1960 ... Positive\n","1960 Mal_1961 ... Negative\n","1961 Mal_1962 ... Positive\n","\n","[1962 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"Jt6Z80uFGjsD"},"source":["data_test.to_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam//Ensemble_SVM_LR_Malalayam.tsv\", sep=\"\\t\", index=False)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"11fdDMTpG8QS"},"source":["check_submission = pd.read_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam//Ensemble_SVM_LR_Malalayam.tsv\", sep=\"\\t\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"uwyh-IqGG8SG","executionInfo":{"status":"ok","timestamp":1624685262383,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"867f7e4a-2308-4e01-b70c-9999e9dcd1be"},"source":["check_submission"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>id</th>\n"," <th>text</th>\n"," <th>category</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>Mal_1</td>\n"," <td>Teaserinu kurach samayamkoodi mathram. Cant wa...</td>\n"," <td>Mixed_feelings</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>Mal_2</td>\n"," <td>അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...</td>\n"," <td>unknown_state</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Mal_3</td>\n"," <td>മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>Mal_4</td>\n"," <td>Nowadays 944k views is considered as 1M views ...</td>\n"," <td>not-malayalam</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Mal_5</td>\n"," <td>Maass.trailer ennu paranja ithaanU makkale.......</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1957</th>\n"," <td>Mal_1958</td>\n"," <td>Expression വച്ചു നോക്കുമ്പോൾ മൂക്കുത്തി കട്ടത്...</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>1958</th>\n"," <td>Mal_1959</td>\n"," <td>എനിക്ക് ഒരു 100 like തരുമോ</td>\n"," <td>unknown_state</td>\n"," </tr>\n"," <tr>\n"," <th>1959</th>\n"," <td>Mal_1960</td>\n"," <td>Nannayi onnu poliyunna lakshnm unde...Thrill m...</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>1960</th>\n"," <td>Mal_1961</td>\n"," <td>Athikam eduthu kayatti vekkenda....direction k...</td>\n"," <td>Negative</td>\n"," </tr>\n"," <tr>\n"," <th>1961</th>\n"," <td>Mal_1962</td>\n"," <td>Ithupole mooonchiya Oru padam jeevithathil kan...</td>\n"," <td>Positive</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1962 rows × 3 columns</p>\n","</div>"],"text/plain":[" id ... category\n","0 Mal_1 ... Mixed_feelings\n","1 Mal_2 ... unknown_state\n","2 Mal_3 ... Positive\n","3 Mal_4 ... not-malayalam\n","4 Mal_5 ... Positive\n","... ... ... ...\n","1957 Mal_1958 ... Positive\n","1958 Mal_1959 ... unknown_state\n","1959 Mal_1960 ... Positive\n","1960 Mal_1961 ... Negative\n","1961 Mal_1962 ... Positive\n","\n","[1962 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"code","metadata":{"id":"MIEN8srCnBlP"},"source":[""],"execution_count":null,"outputs":[]}]} |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.