From 0e17d8d18af639cdd442fca5f94a66639408e104 Mon Sep 17 00:00:00 2001 From: Saitako <68012927+Tatchakorn@users.noreply.github.com> Date: Mon, 8 Feb 2021 16:57:37 +0800 Subject: [PATCH] Add files via upload --- read_xml_from_db.py | 76 +++++++ seperate_col.py | 39 ++++ test.py | 515 ++++++++++++++++++++++++++++++++++++++++++++ test_02.py | 116 ++++++++++ write_xml_to_db.py | 53 +++++ 5 files changed, 799 insertions(+) create mode 100644 read_xml_from_db.py create mode 100644 seperate_col.py create mode 100644 test.py create mode 100644 test_02.py create mode 100644 write_xml_to_db.py diff --git a/read_xml_from_db.py b/read_xml_from_db.py new file mode 100644 index 0000000..ba3b9d7 --- /dev/null +++ b/read_xml_from_db.py @@ -0,0 +1,76 @@ +import mysql.connector +from mysql.connector import Error +import xml.etree.ElementTree as ET +from xml.dom import minidom + + +def compute_gpa(score): + if score >= 90: # A+ + return 4.5 + elif score >= 85: # A + return 4 + elif score >= 80: # A- + return 3.7 + elif score >= 77: # B+ + return 3.3 + elif score >= 73: # B + return 3 + elif score >= 70: # B- + return 2.7 + elif score >= 67: # C+ + return 2.5 + elif score >= 63: # C + return 2.3 + elif score >= 60: # C- + return 2 + elif score >= 50: # D + return 1 + else: + return 0 # E + + +try: + # Connect MySQL/test1 database + connection = mysql.connector.connect( + host='localhost', # host name + database='xmldb', # database name + user='tata', # account + password='123123') # password + + if connection.is_connected(): + # version of database + db_Info = connection.get_server_info() + print("version of database:", db_Info) + + # show information of the current database + cursor = connection.cursor() + cursor.execute("SELECT DATABASE();") + record = cursor.fetchone() + print("current database:", record) + + cursor = connection.cursor() + query = "SELECT * FROM score_data;" + cursor.execute(query) + + root = ET.Element("score_data") + + for (student_id, xml_score, data_structure_score, algorithm_score, network_score) in cursor: + sub_1 = ET.SubElement(root, "student", student_id=student_id) + xml_class = ET.SubElement(sub_1, "xml_class", gpa=str(compute_gpa(xml_score))).text = str(xml_score) + xml_class = ET.SubElement(sub_1, "data_structure", gpa=str(compute_gpa(data_structure_score))).text = str(data_structure_score) + xml_class = ET.SubElement(sub_1, "algorithm", gpa=str(compute_gpa(algorithm_score))).text = str(algorithm_score) + xml_class = ET.SubElement(sub_1, "network", gpa=str(compute_gpa(network_score))).text = str(network_score) + + # Save the xml file + xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ") + with open("./xml/output.xml", "w") as f: + f.write(xmlstr) + +except Error as e: + print("connection failure:", e) + +finally: + if (connection.is_connected()): + cursor.close() + connection.close() + print("database connection closed") diff --git a/seperate_col.py b/seperate_col.py new file mode 100644 index 0000000..cd27d0a --- /dev/null +++ b/seperate_col.py @@ -0,0 +1,39 @@ +import pandas as pd +import glob +import re + + +def split_to_groups(list_, n_group): + assert (len(list_) / n_group) < 998, "Can't split to {} groups".format(len(l) / n) + + if list_ == []: + return [] + + else: + f = [list_[:n_group]] + f.extend(split_to_groups(list_[n_group:], n_group)) + return f + + +file_path = glob.glob(r"./kluay_csv/*.csv") + +for file in file_path: + + dict = {} + dict_index = 0 + print(file[12:-4]) + + df = pd.read_csv(file) + num_table = re.findall("\d+\.\d+", str(df["Length"].to_string())) + num_table.pop(-1) + + n = split_to_groups(num_table, 3) + + for n_1, n_2, n_3 in n: + dict[dict_index] = {"latitude": n_1, "longtitude": n_2, "height": n_3} + dict_index = dict_index + 1 + + df = pd.DataFrame.from_dict(dict, "index") + + path = f'.//kluay_csv//{file[12:-4]}_LLL.csv' + df.to_csv(path) diff --git a/test.py b/test.py new file mode 100644 index 0000000..dd6f778 --- /dev/null +++ b/test.py @@ -0,0 +1,515 @@ +# import numpy as np +# from sklearn.preprocessing import scale +# import pandas as pd +# import matplotlib.pyplot as plt +# from sklearn.decomposition import PCA +# from sklearn import preprocessing +# import numpy as np + +# import torch + +def Input_from_opponent_char(str_): + if str_ == 'a': + return '1' + elif str_ == 'b': + return '2' + elif str_ == 'c': + return '3' + elif str_ == 'd': + return '4' + elif str_ == 'e': + return '5' + + elif str_ == 'f': + return '6' + elif str_ == 'g': + return '7' + elif str_ == 'h': + return '8' + elif str_ == 'i': + return '9' + elif str_ == 'j': + return '10' + + elif str_ == 'k': + return '11' + elif str_ == 'l': + return '12' + elif str_ == 'm': + return '13' + elif str_ == 'n': + return '14' + elif str_ == 'o': + return '15' + + elif str_ == 'p': + return '16' + elif str_ == 'q': + return '17' + elif str_ == 'r': + return '18' + elif str_ == 's': + return '19' + + +def Input_from_opponent_num(str_): + if str_ == '1': + return 'S' + elif str_ == '2': + return 'R' + elif str_ == '3': + return 'Q' + elif str_ == '4': + return 'P' + elif str_ == '5': + return 'O' + + elif str_ == '6': + return 'N' + elif str_ == '7': + return 'M' + elif str_ == '8': + return 'L' + elif str_ == '9': + return 'K' + elif str_ == '10': + return 'J' + + elif str_ == '11': + return 'I' + elif str_ == '12': + return 'H' + elif str_ == '13': + return 'G' + elif str_ == '14': + return 'F' + elif str_ == '15': + return 'E' + + elif str_ == '16': + return 'D' + elif str_ == '17': + return 'C' + elif str_ == '18': + return 'B' + elif str_ == '19': + return 'A' + + +def Output_from_num(str_): + if str_ == '1': + return 'A' + elif str_ == '2': + return 'B' + elif str_ == '3': + return 'C' + elif str_ == '4': + return 'D' + elif str_ == '5': + return 'E' + + elif str_ == '6': + return 'F' + elif str_ == '7': + return 'G' + elif str_ == '8': + return 'H' + elif str_ == '9': + return 'I' + elif str_ == '10': + return 'J' + + elif str_ == '11': + return 'K' + elif str_ == '12': + return 'L' + elif str_ == '13': + return 'M' + elif str_ == '14': + return 'N' + elif str_ == '15': + return 'O' + + elif str_ == '16': + return 'P' + elif str_ == '17': + return 'Q' + elif str_ == '18': + return 'R' + elif str_ == '19': + return 'S' + + +def Output_from_char(str_): + if str_ == 'a': + return '19' + elif str_ == 'b': + return '18' + elif str_ == 'c': + return '17' + elif str_ == 'd': + return '16' + elif str_ == 'e': + return '15' + + elif str_ == 'f': + return '14' + elif str_ == 'g': + return '13' + elif str_ == 'h': + return '12' + elif str_ == 'i': + return '11' + + elif str_ == 'j': + return '10' + elif str_ == 'k': + return '9' + elif str_ == 'l': + return '8' + elif str_ == 'm': + return '7' + + elif str_ == 'n': + return '6' + elif str_ == 'o': + return '5' + elif str_ == 'p': + return '4' + elif str_ == 'q': + return '3' + elif str_ == 'r': + return '2' + elif str_ == 's': + return '1' + + +while True: + try: + str_ = str(input()).split() + print(str_) + print("Input:", Input_from_opponent_num(str_[1]), Input_from_opponent_char(str_[0])) + + print("Output:", Output_from_num(str_[1]), Output_from_char(str_[0])) + except: + print('try again!!') + +""" +x = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2, 1, 1.5, 1.1] +y = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9] +plt.scatter(x, y) +# plt.show() + +df = pd.DataFrame({'x': x, 'y': y}) + +scaled_data = preprocessing.scale(df) + +plt.scatter(scaled_data[:, 0], scaled_data[:, 1]) + +pca = PCA() +pca.fit(scaled_data) +pca_data = pca.transform(scaled_data) + +print(pca.get_covariance()) + +import sys +sys.exit() +path = r"C:\go_concat\new_player_table.pkl" +df = pd.read_pickle(path) + +df = df.astype({"win%": "float", "win%_human": "float", "win%_bot": "float", "bot%": "float", "wb%": "float", + "ww%": "float", "wb_human%": "float", "wb_bot%": "float", "ww_human%": "float", "ww_bot%": "float"}) + + + +plt.style.use("seaborn") +df = df.dropna() +df = df[["win%", "ww%", "wb%", "win%_human", "win%_bot", "wb_human%", "ww_human%", "ww_bot%", "wb_bot%"]] + +# Scale the data so that each row the mean = 0 and std = 1 +# The scale function expects samples to be rows so -> transpose it +scaled_data = preprocessing.scale(df.T) +pca = PCA() +pca.fit(scaled_data) +pca_data = pca.transform(scaled_data) + +print(pca_data.shape) + + +# scree plot +# Displays how much variation each principal component captures from the data +per_var = np.round(pca.explained_variance_ratio_*100, decimals=1) +labels = ['PC' + str(i) for i in range(1, len(per_var)+1)] + +plt.bar(x=range(1, len(per_var)+1), height=per_var, tick_label=labels) +plt.ylabel("Variance Ratio%") +plt.xlabel("Principle components") +plt.title("Scree plot") +path = r"C:\go_concat\graph\pca_scree_plot.png" +plt.savefig(path) +plt.show() +index = ["win%", "ww%", "wb%", "win%_human", "win%_bot", "wb_human%", "ww_human%", "ww_bot%", "wb_bot%"] + + +# Draw PCA plot +# A PCA plot shows clusters of samples based on their similarity. +pca_df = pd.DataFrame(pca_data, index=index, columns=labels) + +plt.scatter(pca_df['PC1'], pca_df['PC2']) +plt.title('PCA Graph') + +plt.xlabel('PC1 -{0}%'.format(per_var[0])) +plt.ylabel('PC2 -{0}%'.format(per_var[1])) + +for i in pca_df.index: + plt.annotate(i, (pca_df['PC1'].loc[i],pca_df['PC2'].loc[i])) + +from k_mean import cluster_df +path = r"C:\go_concat\graph\pca_graph.png" +cluster_df(df=pca_df[["PC1","PC2"]], x_label="PC1", y_label="PC2", n_cluster=3,save_path=path, color_centroid=False) + +print(pca_df["PC1"]) +# A loading plot shows how strongly each characteristic influences a principal component. +loading_scores = pd.Series(pca.components_[0], index=df.index) +sorted_loading_scores = loading_scores.abs().sort_values(ascending=False) +top_10_genes = sorted_loading_scores[0:10].index.values +print(loading_scores[top_10_genes] +""" + +""" +Principle Component Analysis (PCA) +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import confusion_matrix +from sklearn.metrics import accuracy_score + +# df = pd.read_pickle("./output/table/human_table.pkl") +# df = pd.DataFrame(df) +# df = df.dropna() +# df = df.astype({"win%": "float", "win%_human": "float", "win%_bot": "float", "bot%": "float", "wb%": "float", +# "ww%": "float", "wb_human%": "float", "wb_bot%": "float", "ww_human%": "float", "ww_bot%": "float"}) + +url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" +names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class'] +dataset = pd.read_csv(url, names=names) +x = dataset.drop('Class', 1) +y = dataset['Class'] +# # Preprocessing +# print(df.to_string()) +# x = df[["win%", "bot%"]] +# # +# y = df.sort_values(by=["n_game"])["n_game"] + +# Divide data into training and test sets +x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0) + +# Perform standard scalar normalization to normalize the feature set +sc = StandardScaler() +x_train = sc.fit_transform(x_train) +x_test = sc.fit_transform(x_test) + +# PCA +pca = PCA(n_components=4) +x_train = pca.fit_transform(x_train) +x_test = pca.transform(x_test) + +explained_variance = pca.explained_variance_ratio_ +print(explained_variance) + +# use random forest classification for making the predictions + +classifier = RandomForestClassifier(max_depth=2, random_state=0) +classifier.fit(x_train, y_train) + +y_pred = classifier.predict(x_test) +cm = confusion_matrix(y_test, y_pred) +print(cm) +print('Accuracy', accuracy_score(y_test, y_pred)) + +# --------------------------------- + + +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.preprocessing import LabelEncoder, StandardScaler +from sklearn.decomposition import PCA +from sklearn.model_selection import train_test_split +import warnings +warnings.filterwarnings("ignore") + +df = pd.read_csv('mushrooms.csv') + +# Machine learning systems work with integers, we need to encode these +# string characters into ints + +encoder = LabelEncoder() + +# Now apply the transformation to all the columns: +for col in df.columns: + df[col] = encoder.fit_transform(df[col]) + +X_features = df.iloc[:,1:23] +y_label = df.iloc[:, 0] + +# Scale the features +sc = StandardScaler() +X_features = sc.fit_transform(X_features) + +# Visualize +pca = PCA() +pca.fit_transform(X_features) +pca_variance = pca.explained_variance_ + +plt.figure(figsize=(8, 6)) +plt.bar(range(22), pca_variance, alpha=0.5, align='center', label='individual variance') +plt.legend() +plt.ylabel('Variance ratio') +plt.xlabel('Principal components') +plt.show() + +pca2 = PCA(n_components=2) +pca2.fit(X_features) +x_3d = pca2.transform(X_features) + +plt.figure(figsize=(8,6)) +plt.scatter(x_3d[:,0], x_3d[:,1], c=df['class']) +plt.show() + +# --------------------------------- + +from sklearn.decomposition import PCA +import matplotlib.pyplot as plt +import numpy as np + + +def draw_vector(v0, v1, ax=None): + ax = ax or plt.gca() + arrowprops=dict(arrowstyle='->', + linewidth=2, + shrinkA=0, shrinkB=0) + ax.annotate('', v1, v0, arrowprops=arrowprops) + + +rng = np.random.RandomState(1) +X = np.dot(rng.rand(2, 2), rng.randn(2, 200)).T +plt.scatter(X[:, 0], X[:, 1]) +plt.axis('equal') +plt.show() + + +pca = PCA(n_components=2) +pca.fit(X) + +print(pca.components_) +print(pca.explained_variance_) +print(pca.explained_variance_ratio_) + + +# # plot data +# plt.scatter(X[:, 0], X[:, 1], alpha=0.2) +# for length, vector in zip(pca.explained_variance_, pca.components_): +# v = vector * 3 * np.sqrt(length) +# draw_vector(pca.mean_, pca.mean_ + v) +# plt.axis('equal'); +# plt.show() + +pca = PCA(n_components=1) +pca.fit(X) +X_pca = pca.transform(X) +print("original shape: ", X.shape) +print("transformed shape:", X_pca.shape) + +X_new = pca.inverse_transform(X_pca) +plt.scatter(X[:, 0], X[:, 1], alpha=0.2) +plt.scatter(X_new[:, 0], X_new[:, 1], alpha=0.8) +print(X_new) +plt.axis('equal') +plt.show() + +# --------------------------------- + +""" + +""" +K-Means Clustering + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.cluster import KMeans + +plt.style.use("seaborn") +X = np.array([[5,3], + [10,15], + [15,12], + [24,10], + [30,45], + [85,70], + [71,80], + [60,78], + [55,52], + [80,91],]) +# plt.scatter(X[:, 0], X[:, 1], label="True Position") +# plt.show() +kmeans = KMeans(n_clusters=3) +kmeans.fit(X) + +print(kmeans.cluster_centers_) +print(kmeans.labels_) + +plt.scatter(X[:,0],X[:,1], c=kmeans.labels_, cmap='rainbow') +plt.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black') +plt.show() + +""" + + + +""" +Linear Regression + + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +plt.style.use("seaborn") +df = pd.read_pickle("./output/table/human_table.pkl") +df = pd.DataFrame(df) + + +print(df.head().to_string()) +df = df.sort_values(by=["n_game", "win%"]) + +# Change the data type of every column to float +df = df.astype({"win%": "float", "win%_human": "float", "win%_bot": "float", "bot%": "float", "wb%": "float", + "ww%": "float", "wb_human%": "float", "wb_bot%": "float", "ww_human%": "float", "ww_bot%": "float"}) + +x = df["n_game"].tolist() +y = df["win%"].tolist() + +x_bar = np.mean(x) +y_bar = np.mean(y) + +sum_residual = 0 +variance = 0 +for i in range(df.shape[0]): + x_diff = (x[i] - x_bar) + sum_residual += x_diff * (y[i] - y_bar) + variance += x_diff ** 2 +m = sum_residual / variance + +b = y_bar - m * x_bar +print(m, b) + +x_n = np.arange(0, 20) +y_n = m*x_n + b + +plt.plot(x_n, y_n) +plt.show() + +""" diff --git a/test_02.py b/test_02.py new file mode 100644 index 0000000..7782e16 --- /dev/null +++ b/test_02.py @@ -0,0 +1,116 @@ +import cv2 as cv +import numpy as np +import os +import glob +import face_recognition +import read_write_path as rw +import pickle + +# /---- ----/ +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +IMAGE_DIR = os.path.join(BASE_DIR, 'images') +DATA_DIR = os.path.join(BASE_DIR, 'DATA') + +temp_path = os.path.join(DATA_DIR, 'train') +train_data = glob.glob(temp_path + '\*.jpg') + +temp_path = os.path.join(DATA_DIR, 'test', 'indatabase') +in_database_data = glob.glob(temp_path + '\*.jpg') + +temp_path = os.path.join(DATA_DIR, 'test', 'not in database') +not_in_database_data = glob.glob(temp_path + '\*.jpg') +# /---- ----/ + +# /---- ----/ +haar_cascades_path = r'./tar_hw/DATA/haarcascades/' +face_cascade = cv.CascadeClassifier(haar_cascades_path + 'haarcascade_frontalface_alt.xml') +eye_cascade = cv.CascadeClassifier(haar_cascades_path + 'Cascades/haarcascade_eye.xml') +smile_cascade = cv.CascadeClassifier(haar_cascades_path + 'Cascades/haarcascade_smile.xml') +# /---- ----/ + +# /---- ----/ +''' +Get name and ID for traning dataset +''' + +train_name = [os.path.splitext(os.path.basename(file))[0] for file in + train_data] # Extract name of all people in training data set +id_name = list(enumerate(train_name)) # Pair (id, name) + +# print(id_name) + +# /---- ----/ + + +# /---- ----/ +# (1.) Create folder for each person + +# (2.) Image Augmentation + +# (3.) Save image + + +# /---- ----/ + +# /---- ----/ + +# name_list = [] +# encoding_list = [] +# for img_file, name in zip(train_data, train_name): +# img = cv.imread(img_file) +# # print(img_file.split(os.path.sep)[-2]) +# # Convert BGR to RGB +# # OpenCV orders color channels in BGR, but the dlib +# # actually expects RGB. The face_recognition module uses dlib , +# # so before we proceed, let’s swap color +# img = cv.cvtColor(img, cv.COLOR_BGR2RGB) +# # find/localize the faces of the image resulting in a list of face +# boxes = face_recognition.face_locations(img, model="cnn") +# print(boxes) +# # turn the bounding boxes of the image into a list of 128 numbers i.e. encoding the face into a vector +# encodings = face_recognition.face_encodings(img, boxes) +# print(name) +# +# +# for encoding in encodings: +# name_list.append(name) +# encoding_list.append(encoding) +# +# +# data = {'name':name_list,'encoding':encoding_list} +# +# print(rw.save_table_path('encoding_data.pkl')) +# +# with open(rw.save_table_path('encoding_data.pkl'), "wb") as pickle_file: +# pickle.dump(data, pickle_file) + + +# /---- <\training> ----/ + +# /---- ----/ +with open(rw.save_table_path('encoding_data.pkl'), "rb") as pickle_file: + data = pickle.load(pickle_file) + +for img_file in not_in_database_data: + img = cv.imread(img_file) + img = cv.cvtColor(img, cv.COLOR_BGR2RGB) + + input_img_name = os.path.splitext(os.path.basename(img_file))[0] # name of the input file + + boxes = face_recognition.face_locations(img, model="cnn") + encodings = face_recognition.face_encodings(img, boxes) + for encoding in encodings: + matches = face_recognition.compare_faces(data["encoding"], encoding) + + face_distances = face_recognition.face_distance(data["encoding"], encoding) + face_distances = face_distances.tolist() + print('-' * 100) + print('input name:', input_img_name) + print('-' * 100) + for name, dist in zip(train_name, face_distances): + print(name + ":\t", dist) + print('-' * 100) + print('output name:', train_name[np.argmin(face_distances)]) + +# /---- <\test> ----/ + diff --git a/write_xml_to_db.py b/write_xml_to_db.py new file mode 100644 index 0000000..1352e3a --- /dev/null +++ b/write_xml_to_db.py @@ -0,0 +1,53 @@ +import mysql.connector +from mysql.connector import Error +import xml.etree.ElementTree as ET +try: + # Connect MySQL/test1 database + connection = mysql.connector.connect( + host='localhost', # host name + database='xmldb', # database name + user='tata', # account + password='123123') # password + + if connection.is_connected(): + # version of database + db_Info = connection.get_server_info() + print("version of database:", db_Info) + + # show information of the current database + cursor = connection.cursor() + cursor.execute("SELECT DATABASE();") + record = cursor.fetchone() + print("current database:", record) + + sql = "INSERT INTO score_data (student_id, xml_score, data_structure_score, algorithm_score, network_score) " \ + "VALUES (%s, %s, %s, %s, %s);" + + # Read xml file + tree = ET.parse("./xml/score_data.xml") + root = tree.getroot() + + for student in root.iter('student'): + temp = [] + for subject in student.iter(): + + # Get grades + if subject.text.isdigit(): + temp.append(int(subject.text)) + + # Attributes stored in a dictionary + new_data = (student.attrib["student_id"], temp[0], temp[1], temp[2], temp[3]) + cursor = connection.cursor() + cursor.execute(sql, new_data) + + # confirm data is stored into database + connection.commit() + +except Error as e: + print("connection failure:", e) + +finally: + if (connection.is_connected()): + cursor.close() + connection.close() + print("database connection closed")