Add files via upload

pydeveloper510 · web-flow · commit 5e26049e75b6 · 2021-06-17T11:08:26.000+08:00
diff --git a/pandas/Drop the Rows with NaN Values in Pandas DataFrame.py b/pandas/Drop the Rows with NaN Values in Pandas DataFrame.py
@@ -0,0 +1,10 @@
+import pandas as pd
+
+df = pd.DataFrame({'values_1': ['700','ABC','500','XYZ','1200'],
+                   'values_2': ['DDD','150','350','400','5000']
+                   })
+
+df = df.apply (pd.to_numeric, errors='coerce')
+df = df.dropna()
+
+print (df)
diff --git a/pandas/How to Convert Index to Column in Pandas DataFrame.py b/pandas/How to Convert Index to Column in Pandas DataFrame.py
@@ -0,0 +1,2 @@
+df.reset_index(inplace=True)
+df = df.rename(columns = {'index':'new column name'})
diff --git a/pandas/calculate weeks and add them into dataframe.py b/pandas/calculate weeks and add them into dataframe.py
@@ -0,0 +1,4 @@
+import pandas as pd
+
+df["Week"] = pd.to_datetime(df['date']).dt.year.astype(str).str.cat(
+            pd.to_datetime(df['date']).dt.week.astype(str).str.zfill(2), sep='-')
diff --git a/pandas/merge_two_csvs_with_pandas.py b/pandas/merge_two_csvs_with_pandas.py
@@ -0,0 +1,8 @@
+import pandas as pd
+
+
+ips = pd.read_csv('Result1.csv', names=['IP','Domain','Country','Region','City','ISP','ASN'], encoding='ISO-8859-1')
+org_file = pd.read_csv('WA_2017_append.csv', names=['First Name', 'Last Name', 'DOB', 'IP'], encoding='ISO-8859-1')
+
+merged_left = pd.merge(left=org_file,right=ips, how='left', left_on='IP', right_on='IP')
+merged_left.to_csv('WA_2017_append(result).csv', sep=',', encoding='utf-8', header=True)
diff --git a/pandas/read excel in pandas.py b/pandas/read excel in pandas.py
@@ -0,0 +1,4 @@
+import pandas as pd
+import os
+
+df = pd.read_sql_table('temp_new', f"sqlite:///{os.path.join(working_dir, big_db_name)}")
diff --git a/pandas/save dataframe into database.py b/pandas/save dataframe into database.py
@@ -0,0 +1,3 @@
+import pandas as pd
+
+df.to_sql(db_daily_table_name, small_db.con, if_exists='replace', index=False, chunksize=100)
diff --git a/pandas/shift.py b/pandas/shift.py
@@ -0,0 +1,10 @@
+import pandas as pd
+df = pd.DataFrame({"Col1": [10, 20, 15, 30, 45],
+                   "Col2": [13, 23, 18, 33, 48],
+                   "Col3": [17, 27, 22, 37, 52]},
+                  index=pd.date_range("2020-01-01", "2020-01-05"))
+
+print(df)
+
+df = df.shift(periods=3)
+print(df)
diff --git a/pandas/sort.py b/pandas/sort.py
@@ -0,0 +1,13 @@
+import pandas as pd
+import numpy as np
+
+df = pd.DataFrame({
+    'col1': ['A', 'A', 'B', np.nan, 'D', 'C'],
+    'col2': [2, 1, 9, 8, 7, 4],
+    'col3': [0, 1, 9, 4, 2, 3],
+    'col4': ['a', 'B', 'c', 'D', 'e', 'F']
+})
+
+df = df.sort_values(by=['col1'], inplace=True)
+
+print(df)
diff --git a/pandas/split_apply.py b/pandas/split_apply.py
@@ -0,0 +1,27 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+
+sales_dict = {'colour': ['Yellow', 'Black', 'Blue', 'Red', 'Yellow', 'Black', 'Blue',
+                         'Red', 'Yellow', 'Black', 'Blue', 'Red', 'Yellow', 'Black', 'Blue', 'Red', 'Blue', 'Red'],
+              'sales': [100000, 150000, 80000, 90000, 200000, 145000, 120000,
+                        300000, 250000, 200000, 160000, 90000, 90100, 150000, 142000, 130000, 400000, 350000],
+              'transactions': [100, 150, 820, 920, 230, 120, 70, 250, 250, 110, 130, 860, 980, 300, 150, 170, 230, 280],
+              'product': ['type A', 'type A', 'type A', 'type A', 'type A', 'type A', 'type A',
+                          'type A', 'type A', 'type B', 'type B', 'type B', 'type B', 'type B', 'type B', 'type B',
+                          'type B', 'type B']}
+
+data_sales = pd.DataFrame(sales_dict)
+# print(data_sales)
+
+ref_data = data_sales[data_sales.colour == 'Blue'][['sales', 'product']]
+ref_data = ref_data.rename(columns={"sales": "sales_1"})
+
+def make_color(d, ref_data):
+    # d['sales_2'] = d['sales'] - 1
+    d = d.merge(ref_data, how='inner', on='product')
+    return d
+
+data_colour = data_sales.groupby('colour').apply(lambda x: make_color(x, ref_data))
+print(data_colour)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+df.reset_index(inplace=True)`
	`2`	`+df = df.rename(columns = {'index':'new column name'})`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+import pandas as pd`
	`2`	`+`
	`3`	`+df.to_sql(db_daily_table_name, small_db.con, if_exists='replace', index=False, chunksize=100)`