diff --git "a/1-analise-explorat\303\263ria-basica/01-lendo-dados-e-medidas-centralidade/aula.ipynb" "b/1-analise-explorat\303\263ria-basica/01-lendo-dados-e-medidas-centralidade/aula.ipynb" index 46d1fb8..8eb6da5 100644 --- "a/1-analise-explorat\303\263ria-basica/01-lendo-dados-e-medidas-centralidade/aula.ipynb" +++ "b/1-analise-explorat\303\263ria-basica/01-lendo-dados-e-medidas-centralidade/aula.ipynb" @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -34,7 +34,7 @@ "" ] }, - "execution_count": 7, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -121,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -158,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -167,7 +167,7 @@ "list" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -178,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -187,7 +187,7 @@ "str" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -226,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -264,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 41, "metadata": { "scrolled": true }, @@ -273,7 +273,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Média da coluna Idade: 41.67\n" + "Média da coluna Idade: 37.63\n" ] } ], @@ -284,7 +284,7 @@ " lista_exemplo = []\n", " somatorio = 0\n", " for i, line in enumerate(data):\n", - " if i < 10 and i>0:\n", + " if i>0:\n", " somatorio += int(line[2])\n", " lista_exemplo.append(line[2])\n", " \n", @@ -303,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -319,10 +319,10 @@ "evalue": "could not convert string to float: ", "output_type": "error", "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[0msomatorio_idade\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[0msomatorio_estudo\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0msomatorio_salario\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m6\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mValueError\u001b[0m: could not convert string to float: " + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0msomatorio_idade\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0msomatorio_estudo\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0msomatorio_salario\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: could not convert string to float: " ] } ], @@ -357,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -367,29 +367,77 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "unexpected EOF while parsing (, line 7)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m7\u001b[0m\n\u001b[0;31m #return saida\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m unexpected EOF while parsing\n" + ] + } + ], "source": [ "def substitui_nulo_por_zero(entrada):\n", + "\n", " #\n", " #\n", - " return saida" + " #\n", + "\n", + " #return saida" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'substitui_nulo_por_zero' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_csv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0msubstitui_nulo_por_zero\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mlista_exemplo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'substitui_nulo_por_zero' is not defined" + ] + } + ], + "source": [ + "somatorio_idade = 0\n", + "somatorio_estudo = 0\n", + "somatorio_salario = 0\n", + "\n", + "\n", + "with open(r'data/data.csv', 'r') as data_csv:\n", + " data = csv.reader(data_csv)\n", + " \n", + " substitui_nulo_por_zero(data)\n", + " \n", + " lista_exemplo = []\n", + " somatorio = 0\n", + " \n", + " for i, row in enumerate(data):\n", + " \n", + " if i>0:\n", + " print(row)\n", + " somatorio_idade += int(row[2])\n", + " somatorio_estudo += float(row[4])\n", + " somatorio_salario += float(row[6])\n", + " \n", + " \n", + "media_idade = round(somatorio_idade/i,2)\n", + "media_estudo = round(somatorio_estudo/i,2)\n", + "media_salario = round(somatorio_salario/i,2)\n", + "\n", + "\n", + "print(f'Média da coluna Idade: {media_idade}')\n", + "print(f'Média da coluna Anos de Estudo: {media_estudo}')\n", + "print(f'Média da coluna Salario: {media_salario}')" + ] }, { "cell_type": "code", @@ -439,14 +487,114 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Função para calcular a Mediana com Dicionarios\n", + "#### Função para calcular a Mediana com Dicionarios" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vamos criar um looping para procurar a pessoa mais velha da base" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A pessoa mais velha na base é o id 11002710101.0, que tem 60 anos.\n" + ] + } + ], + "source": [ + "max_age = None\n", + "oldest_person = None\n", + "\n", + "input_file = csv.DictReader(open(r'data/data.csv', 'r'))\n", + "\n", + "for row in input_file:\n", + " age = int(row[\"idade\"])\n", + " if max_age == None or max_age < age:\n", + " max_age = age\n", + " oldest_person = row[\"id\"]\n", + "\n", + "if max_age != None:\n", + " print(\"A pessoa mais velha na base é o id %s, que tem %d anos.\" % (oldest_person, max_age))\n", + "else:\n", + " print(\"Não tem ninguém nesse arquivo\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Procurando a pessoa mais nova da base" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Calculando a mediana" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Moda\n", + "\n", + "\n", + "Em estatística, moda é uma das medidas de tendência central de um conjunto de dados, assim como a média e a mediana. Ela pode ser definida em moda amostral e populacional.\n", + "\n", + "Em relação à primeira delas, a moda amostral de um conjunto de dados trata do valor que ocorre com maior frequência ou o valor mais comum em um conjunto de dados. Moda é especialmente útil quando os valores ou as observações não são numéricos, casos em que a média e a mediana não podem ser definidas. \n", + "\n", + "Moda amostral não é necessariamente única como média ou mediana. Amostras que possuem uma moda são chamadas unimodais. Por exemplo, a amostra {1, 2, 3, 5, 5, 6, 7} tem moda 5. Amostras que possuem duas modas são chamadas bimodais. Por exemplo, a amostra {1, 2, 3, 5, 5, 6, 6} tem modas 5 e 6. Amostras que possuem várias modas são chamadas multimodais. Por exemplo, a amostra {1, 2 3, 5, 5, 6, 6, 7, 7} tem modas 5, 6 e 7. Amostras que não possuem moda são chamadas amodais. Por exemplo, a amostra {1, 3, 2, 5, 7, 6} não tem moda. \n", + "\n", + "Por exemplo, a moda de [0,1,1,2,2,2,2,3,3,4,4,4,5] é 2.\n", + "\n", + "A moda não é necessariamente única. Pode ser que existam dois valores diferentes que sejam os mais frequentes. Por exemplo, no caso de [10, 13, 13, 20, 20], tanto 13 como 20 são a moda.\n", "\n", + "
\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "Material complementar para dicionarios python: http://excript.com/python/funcoes-dicionarios.html" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -488,7 +636,7 @@ }, { "cell_type": "code", - "execution_count": 190, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -505,7 +653,7 @@ " ('regiao', 'centro-oeste')])" ] }, - "execution_count": 190, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -516,7 +664,7 @@ }, { "cell_type": "code", - "execution_count": 191, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -525,7 +673,7 @@ "'37'" ] }, - "execution_count": 191, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +684,7 @@ }, { "cell_type": "code", - "execution_count": 192, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -545,7 +693,7 @@ "'centro-oeste'" ] }, - "execution_count": 192, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -563,7 +711,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -580,7 +728,7 @@ " ('regiao', 'centro-oeste')])" ] }, - "execution_count": 23, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -591,7 +739,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -608,7 +756,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -617,7 +765,7 @@ "True" ] }, - "execution_count": 69, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -628,7 +776,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -637,7 +785,7 @@ "{'idade': '53', 'sexo': 'homem', 'anos_estudo': '5.0', 'salario': '7128.0'}" ] }, - "execution_count": 70, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -648,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -657,7 +805,7 @@ "'5.0'" ] }, - "execution_count": 71, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -668,7 +816,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -677,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -686,7 +834,7 @@ "{'idade': '53', 'sexo': 'homem', 'anos_estudo': '6', 'salario': '7128.0'}" ] }, - "execution_count": 73, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -697,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -706,7 +854,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -714,10 +862,10 @@ "evalue": "'11027211101.0'", "output_type": "error", "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdict_table\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'11027211101.0'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mKeyError\u001b[0m: '11027211101.0'" + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdict_table\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'11027211101.0'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m: '11027211101.0'" ] } ], @@ -727,7 +875,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -741,30 +889,30 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'m': 2,\n", - " 'p': 2,\n", - " 'r': 4,\n", + "{'t': 3,\n", + " 'd': 2,\n", + " 'c': 1,\n", + " 'e': 4,\n", " 'g': 1,\n", " 's': 3,\n", " 'a': 6,\n", - " 'c': 1,\n", - " 'e': 4,\n", - " 't': 3,\n", - " 'i': 2,\n", - " 'h': 1,\n", - " 'd': 2,\n", - " ' ': 5,\n", " 'o': 2,\n", - " 'f': 1}" + " 'p': 2,\n", + " 'f': 1,\n", + " 'h': 1,\n", + " 'i': 2,\n", + " 'm': 2,\n", + " 'r': 4,\n", + " ' ': 5}" ] }, - "execution_count": 78, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -775,7 +923,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -794,7 +942,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -803,7 +951,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -812,7 +960,7 @@ "dict" ] }, - "execution_count": 84, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -823,56 +971,56 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'27': 2017,\n", - " '36': 1723,\n", + "{'50': 1461,\n", + " '49': 1429,\n", + " '32': 1852,\n", + " '27': 2017,\n", + " '45': 1627,\n", " '60': 951,\n", - " '37': 1712,\n", - " '24': 1892,\n", " '31': 1796,\n", - " '45': 1627,\n", - " '43': 1699,\n", - " '41': 1552,\n", - " '34': 1737,\n", - " '50': 1461,\n", - " '39': 1759,\n", - " '20': 2104,\n", + " '56': 1087,\n", + " '28': 2056,\n", + " '47': 1446,\n", + " '23': 2014,\n", " '59': 999,\n", - " '42': 1673,\n", - " '52': 1244,\n", - " '32': 1852,\n", - " '25': 2014,\n", - " '38': 1727,\n", " '30': 1996,\n", + " '22': 2034,\n", + " '42': 1673,\n", + " '33': 1812,\n", + " '26': 2040,\n", + " '39': 1759,\n", " '44': 1688,\n", - " '53': 1249,\n", - " '51': 1260,\n", - " '23': 2014,\n", - " '48': 1505,\n", - " '54': 1221,\n", - " '29': 1943,\n", - " '21': 1987,\n", - " '28': 2056,\n", " '57': 1092,\n", - " '26': 2040,\n", + " '41': 1552,\n", + " '48': 1505,\n", + " '43': 1699,\n", + " '58': 969,\n", " '35': 1672,\n", - " '56': 1087,\n", " '55': 1133,\n", - " '22': 2034,\n", - " '40': 1732,\n", - " '58': 969,\n", + " '21': 1987,\n", + " '36': 1723,\n", + " '38': 1727,\n", " '46': 1566,\n", - " '47': 1446,\n", - " '49': 1429,\n", - " '33': 1812}" + " '20': 2104,\n", + " '54': 1221,\n", + " '29': 1943,\n", + " '25': 2014,\n", + " '51': 1260,\n", + " '53': 1249,\n", + " '34': 1737,\n", + " '52': 1244,\n", + " '40': 1732,\n", + " '37': 1712,\n", + " '24': 1892}" ] }, - "execution_count": 86, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -883,16 +1031,16 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "dict_keys(['27', '36', '60', '37', '24', '31', '45', '43', '41', '34', '50', '39', '20', '59', '42', '52', '32', '25', '38', '30', '44', '53', '51', '23', '48', '54', '29', '21', '28', '57', '26', '35', '56', '55', '22', '40', '58', '46', '47', '49', '33'])" + "dict_keys(['50', '49', '32', '27', '45', '60', '31', '56', '28', '47', '23', '59', '30', '22', '42', '33', '26', '39', '44', '57', '41', '48', '43', '58', '35', '55', '21', '36', '38', '46', '20', '54', '29', '25', '51', '53', '34', '52', '40', '37', '24'])" ] }, - "execution_count": 87, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -903,16 +1051,16 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "dict_values([2017, 1723, 951, 1712, 1892, 1796, 1627, 1699, 1552, 1737, 1461, 1759, 2104, 999, 1673, 1244, 1852, 2014, 1727, 1996, 1688, 1249, 1260, 2014, 1505, 1221, 1943, 1987, 2056, 1092, 2040, 1672, 1087, 1133, 2034, 1732, 969, 1566, 1446, 1429, 1812])" + "dict_values([1461, 1429, 1852, 2017, 1627, 951, 1796, 1087, 2056, 1446, 2014, 999, 1996, 2034, 1673, 1812, 2040, 1759, 1688, 1092, 1552, 1505, 1699, 969, 1672, 1133, 1987, 1723, 1727, 1566, 2104, 1221, 1943, 2014, 1260, 1249, 1737, 1244, 1732, 1712, 1892])" ] }, - "execution_count": 88, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -923,16 +1071,16 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "dict_items([('27', 2017), ('36', 1723), ('60', 951), ('37', 1712), ('24', 1892), ('31', 1796), ('45', 1627), ('43', 1699), ('41', 1552), ('34', 1737), ('50', 1461), ('39', 1759), ('20', 2104), ('59', 999), ('42', 1673), ('52', 1244), ('32', 1852), ('25', 2014), ('38', 1727), ('30', 1996), ('44', 1688), ('53', 1249), ('51', 1260), ('23', 2014), ('48', 1505), ('54', 1221), ('29', 1943), ('21', 1987), ('28', 2056), ('57', 1092), ('26', 2040), ('35', 1672), ('56', 1087), ('55', 1133), ('22', 2034), ('40', 1732), ('58', 969), ('46', 1566), ('47', 1446), ('49', 1429), ('33', 1812)])" + "dict_items([('50', 1461), ('49', 1429), ('32', 1852), ('27', 2017), ('45', 1627), ('60', 951), ('31', 1796), ('56', 1087), ('28', 2056), ('47', 1446), ('23', 2014), ('59', 999), ('30', 1996), ('22', 2034), ('42', 1673), ('33', 1812), ('26', 2040), ('39', 1759), ('44', 1688), ('57', 1092), ('41', 1552), ('48', 1505), ('43', 1699), ('58', 969), ('35', 1672), ('55', 1133), ('21', 1987), ('36', 1723), ('38', 1727), ('46', 1566), ('20', 2104), ('54', 1221), ('29', 1943), ('25', 2014), ('51', 1260), ('53', 1249), ('34', 1737), ('52', 1244), ('40', 1732), ('37', 1712), ('24', 1892)])" ] }, - "execution_count": 89, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -943,7 +1091,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -1000,18 +1148,11 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "#### Calculando a moda" + ] }, { "cell_type": "code", @@ -1020,13 +1161,6 @@ "outputs": [], "source": [] }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, @@ -1045,95 +1179,73 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Moda\n", - "\n", - "\n", - "Em estatística, moda é uma das medidas de tendência central de um conjunto de dados, assim como a média e a mediana. Ela pode ser definida em moda amostral e populacional.\n", - "\n", - "Em relação à primeira delas, a moda amostral de um conjunto de dados trata do valor que ocorre com maior frequência ou o valor mais comum em um conjunto de dados. Moda é especialmente útil quando os valores ou as observações não são numéricos, casos em que a média e a mediana não podem ser definidas. \n", - "\n", - "Moda amostral não é necessariamente única como média ou mediana. Amostras que possuem uma moda são chamadas unimodais. Por exemplo, a amostra {1, 2, 3, 5, 5, 6, 7} tem moda 5. Amostras que possuem duas modas são chamadas bimodais. Por exemplo, a amostra {1, 2, 3, 5, 5, 6, 6} tem modas 5 e 6. Amostras que possuem várias modas são chamadas multimodais. Por exemplo, a amostra {1, 2 3, 5, 5, 6, 6, 7, 7} tem modas 5, 6 e 7. Amostras que não possuem moda são chamadas amodais. Por exemplo, a amostra {1, 3, 2, 5, 7, 6} não tem moda. \n", - "\n", - "Por exemplo, a moda de [0,1,1,2,2,2,2,3,3,4,4,4,5] é 2.\n", - "\n", - "A moda não é necessariamente única. Pode ser que existam dois valores diferentes que sejam os mais frequentes. Por exemplo, no caso de [10, 13, 13, 20, 20], tanto 13 como 20 são a moda.\n", - "\n", - "
\n", - "\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Vamos criar um looping para procurar a pessoa mais velha da base" + "## Prévia da resposta para o nosso problema. As homens ganham mais do que as mulheres? Quanto?" ] }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "A pessoa mais velha na base é o id 11002710101.0, que tem 60 anos.\n" + "Média salário homem: 58358.4\n" ] } ], "source": [ - "max_age = None\n", - "oldest_person = None\n", - "\n", - "input_file = csv.DictReader(open(r'data/data.csv', 'r'))\n", + "with open(r'data/data.csv', 'r') as data_csv:\n", + " data = csv.reader(data_csv)\n", + " \n", + " lista_homem = []\n", + " somatorio = 0.0\n", + " for i, line in enumerate(data):\n", + " if i < 10 and i>0:\n", + " if line[3] == 'homem':\n", + " somatorio += float(line[6])\n", + " lista_homem.append(line[6])\n", + " \n", "\n", - "for row in input_file:\n", - " age = int(row[\"idade\"])\n", - " if max_age == None or max_age < age:\n", - " max_age = age\n", - " oldest_person = row[\"id\"]\n", + "media = round(somatorio/len(lista_homem),2)\n", "\n", - "if max_age != None:\n", - " print(\"A pessoa mais velha na base é o id %s, que tem %d anos.\" % (oldest_person, max_age))\n", - "else:\n", - " print(\"Não tem ninguém nesse arquivo\")" + "print(f'Média salário homem: {media}')" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", + "execution_count": 37, "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "could not convert string to float: ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m10\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'mulher'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0msomatorio\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mlista_mulher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: could not convert string to float: " + ] + } + ], "source": [ - "## Prévia da resposta para o nosso problema. As homens ganham mais do que as mulheres? Quanto?" + "with open(r'data/data.csv', 'r') as data_csv:\n", + " data = csv.reader(data_csv)\n", + " \n", + " lista_mulher = []\n", + " somatorio = 0.0\n", + " for i, line in enumerate(data):\n", + " if i < 10 and i>0:\n", + " if line[3] == 'mulher':\n", + " somatorio += float(line[6])\n", + " lista_mulher.append(line[6])\n", + " \n", + "\n", + "media = round(somatorio/len(lista_mulher),2)\n", + "\n", + "print(f'Média salário mulher: {media}')" ] }, { @@ -1143,13 +1255,6 @@ "outputs": [], "source": [] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, @@ -1175,7 +1280,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.6.8" }, "toc": { "base_numbering": 1,