diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..844d537 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +.DS_Store +*/.DS_Store +Thumbs.db +node_modules/* +*.idea +*~ +package-lock.json +.vscode +.idea/ +.idea/* +.venv +venv +house*/* +log/ +data/ +*.ipynb +*/*.ipynb +.ipynb_checkpoints +*/.ipynb_checkpoints +output*/ \ No newline at end of file diff --git a/README.md b/README.md index 9c48476..8b4275c 100644 --- a/README.md +++ b/README.md @@ -4,5 +4,14 @@ Smart cities, utilities, third-parties, and government agencies are having press To address the problem, we design a new system---"SolarFinder" that can automatically detect distributed solar photovoltaic arrays in a given geospatial region without any extra cost. SolarFinder first automatically fetches low/regular resolution satellite images within the region using publicly-available imagery APIs. Then, SolarFinder leverages multi-dimensional K-means algorithm to automatically segment solar arrays on rooftop images. Eventually, SolarFinder employs hybrid linear regression approach that integrates support vectors machine (SVMs-RBF) modeling with a deep convolutional neural network (CNNs) approach to accurately identify solar arrays and characterize each solar deployment simultaneously. We evaluate SolarFinder using 269,632 public satellite images that include 1,143,636 contours from 13 geospatial regions in U.S. We find that pre-trained SolarFinder yields a MCC of 0.17, which is 3 times better than the most recent pre-trained CNNs approach and is the same as a re-trained CNNs approach. +## Pre-request Environment + +* python3 +* pip latest version +* Install dependencies +```sh +$ pip install -r requirements.txt +``` + SolarFinder work is published at the 19th ACM/IEEE Conference on Information Processing in Sensor Networks (IPSN 2020). If you use our code or datasets in your research, please consider cite our work: diff --git a/data_collection/osm.py b/data_collection/osm.py new file mode 100644 index 0000000..3625d8e --- /dev/null +++ b/data_collection/osm.py @@ -0,0 +1,82 @@ +# this file is used to generate the url to download images from google static map, +# for every house, we store the url to download the house original images and house mask. +# the input is osm file, output is json which store the ulr and csv document which store the id and location of houses. +import csv +import datetime +import glob as gb +import json +import os +import xml.dom.minidom +import xml.dom.minidom + +# used to calculate the download images time +start = datetime.datetime.now() +# osm_path which is the osm file location +osm_path = gb.glob("/*.osm") +for osm in osm_path: + dom = xml.dom.minidom.parse(osm) + num = osm.split("/")[-1] + num = os.path.splitext(num)[0] + # dom = xml.dom.minidom.parse('./0.osm') + root = dom.documentElement + nodelist = root.getElementsByTagName('node') + waylist = root.getElementsByTagName('way') + node_dic = {} + + url_prefix1 = 'https://maps.googleapis.com/maps/api/staticmap?zoom=20&size=400x400&scale=4&maptype=hybrid&path=color:0xff0000ff%7Cweight:5%7Cfillcolor:0xff0000ff' + url_prefix2 = 'https://maps.googleapis.com/maps/api/staticmap?zoom=20&size=400x400&scale=4&maptype=hybrid&path=color:0x00000000%7Cweight:5%7Cfillcolor:0x00000000' + url_suffix = '&key=AIzaSyA7UVGBz0YP8OPQnQ9Suz69_u1TUSDukt8' + + for node in nodelist: + node_id = node.getAttribute('id') + node_lat = float(node.getAttribute('lat')) + node_lon = float(node.getAttribute('lon')) + node_dic[node_id] = (node_lat, node_lon) + url = [] + location = {} + csv_lat = 0 + csv_lon = 0 + num_img = 0 + # json used to store the url of images downloading + with open(os.path.join('./10house/house1/', format(str(num)) + '.json'), 'w') as json_file: + for way in waylist: + taglist = way.getElementsByTagName('tag') + build_flag = False + for tag in taglist: + # choose the attribute to be building, + if tag.getAttribute('k') == 'building': + build_flag = True + if build_flag: + ndlist = way.getElementsByTagName('nd') + s = "" + for nd in ndlist: + nd_id = nd.getAttribute('ref') + if nd_id in node_dic: + node_lat = node_dic[nd_id][0] + node_lon = node_dic[nd_id][1] + g = nd_id + csv_lat = node_dic[nd_id][0] + csv_lon = node_dic[nd_id][1] + print(g) + s += '%7C' + str(node_lat) + '%2C' + str(node_lon) + # secret = 'pSRLFZI7ujDivoNjR-Vz7GR6F4Q=' + url1 = url_prefix1 + s + url_suffix + # url1 = sign_url(url1, secret) + url2 = url_prefix2 + s + url_suffix + # url2 = sign_url(url2, secret) + test_dict = {"id": g, "mask": url1, "image": url2} + url.append(test_dict) + location[g] = str(csv_lat) + ',' + str(csv_lon) + num_img = num_img + 1 + json_str = json.dumps(url) + json_file.write(json_str) + json_file.close() + # csv document used to store the house id and location( latitude and longtitude) + csv_path = "./10house/house1/house1.csv" + with open(csv_path, 'a') as csv_file: + writer = csv.writer(csv_file) + for key, value in location.items(): + writer.writerow([key, value]) + csv_file.close() +end = datetime.datetime.now() +print(end - start) diff --git a/data_collection/roof.py b/data_collection/roof.py new file mode 100644 index 0000000..cc36f6d --- /dev/null +++ b/data_collection/roof.py @@ -0,0 +1,42 @@ +# this is used to download the roof images from google static map , +# we download the original images and mask from google static map (free) then use and operation to get the roof ROI, +# so we can process and label the roof images +import glob as gb +import json +import os + +import cv2 +import numpy as np +import requests + +i = 0 +json_path = gb.glob("./10house/house1/map.json") +for file in json_path: + with open(file, 'r') as file: + urls = json.load(file) + for url in urls: + i = i + 1 + id = url['id'] + mask = url['mask'] + image = url['image'] + mask = requests.get(mask) + image = requests.get(image) + fmask = open(os.path.join('./10house/house1/image/', format(str('1')) + '.png'), 'ab') + fimg = open(os.path.join('./10house/house1/mask/', format(str('1')) + '.png'), 'ab') + fmask.write(mask.content) + fimg.write(image.content) + fmask.close() + fimg.close() + tag = cv2.imread(os.path.join('./10house/house1/image/', format('1') + '.png')) + real = cv2.imread(os.path.join('./10house/house1/mask/', format('1') + '.png')) + lower = np.array([0, 0, 100]) + upper = np.array([40, 40, 255]) + img = cv2.inRange(tag, lower, upper) + + # and operations with images + img = np.expand_dims(img, axis=2) + img = np.concatenate((img, img, img), axis=-1) + result = cv2.bitwise_and(real, img) + cv2.imwrite(os.path.join('./10house/house1/roof/' + format(str(id)) + '.png'), result) + os.remove("./10house/house1/image/1.png") + os.remove("./10house/house1/mask/1.png") diff --git a/data_collection/sign_url.py b/data_collection/sign_url.py new file mode 100644 index 0000000..31efdc8 --- /dev/null +++ b/data_collection/sign_url.py @@ -0,0 +1,32 @@ +import base64 +import hashlib +import hmac +from urllib.parse import urlparse + + +# sign the ulr so there is no limit to download the images from google static map, +# but it may cause extra fees. +def sign_url(input_url=None, secret=None): + if not input_url or not secret: + raise Exception("Both input_url and secret are required") + + url = urlparse(input_url) + + # We only need to sign the path+query part of the string + url_to_sign = url.path + "?" + url.query + + # Decode the private key into its binary format + # We need to decode the URL-encoded private key + decoded_key = base64.urlsafe_b64decode(secret) + + # Create a signature using the private key and the URL-encoded + # string using HMAC SHA1. This signature will be binary. + signature = hmac.new(decoded_key, url_to_sign.encode(), hashlib.sha1) + + # Encode the binary signature into base64 for use within a URL + encoded_signature = base64.urlsafe_b64encode(signature.digest()) + + original_url = url.scheme + "://" + url.netloc + url.path + "?" + url.query + + # Return signed URL + return original_url + "&signature=" + encoded_signature.decode() diff --git a/data_preprocessing/add_angle70.py b/data_preprocessing/add_angle70.py new file mode 100644 index 0000000..bf8f458 --- /dev/null +++ b/data_preprocessing/add_angle70.py @@ -0,0 +1,19 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler +import csv + + +data = pd.read_csv(".csv") +df = pd.DataFrame(data) +data1 =pd.read_csv("/contour_all.csv") +df1 = pd.DataFrame(data1) + +angle70 = df1.iloc[:,13] +df.insert(13, "numangle70", angle70, True) + +export_csv = df.to_csv ('/location810/angle70.csv',index=None) + + + diff --git a/data_preprocessing/contour_cache.py b/data_preprocessing/contour_cache.py new file mode 100644 index 0000000..c22ca33 --- /dev/null +++ b/data_preprocessing/contour_cache.py @@ -0,0 +1,374 @@ +# OpenCV lib +import os +os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"]="0" +import tensorflow as tf +import cv2 +from skimage.segmentation import slic +from skimage import color +from skimage import data +from skimage import io +# Traverse files +import glob as gb +import tensorflow as tf +# Math lib +import numpy as np +import time +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import math +import csv + +from matplotlib.pyplot import imshow +import matplotlib.pyplot as plt +import matplotlib.image as mpimg + + +def kmeans(img): + # K-means + # Convert image to one dimension data + img_ori = img.copy() + img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + Z = img.reshape((-1, 3)) + # Z = img.reshape((-1, 3)) + Z = np.float32(Z) + # define criteria, number of clusters(K) and apply kmeans() + criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) + K =5 + # Run k-means + # ret: compactness + # labels: + # centers: array of centers of clusters + ret, label, center = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS) + # Now convert back into uint8, and make original image + center = np.uint8(center) + res = center[label.flatten()] + res2 = res.reshape(img.shape) + res2_gray = cv2.cvtColor(res2, cv2.COLOR_BGR2GRAY) + + hist = res2_gray.ravel() + hist = set(hist) + hist = sorted(hist) + # print(len(hist)) + threshold = [] + tag=[] + tag1 = [] + tag_dilate3 = [] + tag_dilate5 = [] + tag_dilate7 = [] + tag_close3 = [] + tag_close5 = [] + tag_close7 = [] + for i in range(len(hist)-1): + threshold.append(int(hist[i]/2 + hist[i+1]/ 2)) + # no dilate , not accurate + kernal3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) + kernal5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) + kernal7 = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) + for j in range(len(hist)-1): + if j ==(len(hist)-2): + dia=cv2.inRange(res2_gray, threshold[j], 255) + tag.append(dia) + tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) + tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) + tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) + else: + dia = cv2.inRange(res2_gray, threshold[j], threshold[j+1]) + tag.append(dia) + tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) + tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) + tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) + + for j in range(len(hist) - 1): + if j == (len(hist) - 2): + dia1 = cv2.inRange(res2_gray, threshold[j], 255) + tag1.append(dia1) + + tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) + tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) + tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) + else: + dia1 = cv2.inRange(res2_gray, threshold[j], threshold[j + 1]) + tag1.append(dia1) + tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) + tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) + tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) + + # return(tag,tag_dilate3,tag_close3, tag_dilate5,tag_close5, tag_dilate7, tag_close7 ,hist) + return (tag, hist, tag_close3, tag_dilate5, tag_close5, tag_dilate7, tag_close7, hist) +# the kernel number is returned , use kernel 3 temporiarly. + +# find contours based on kmeans method +def find_contours(img, mask_list): + # Get the area of roof + masks_length = len(mask_list) + cont = [] + for i in range(0, masks_length): + _,c, h = cv2.findContours(mask_list[i], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + for contour in c: + cont.append(contour) +# cv2.drawContours(img, cont, -1, (0, 0, 255), 2) + return [img,cont] + +# use size filter +def filter_size(img,contour): + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + roof_area = cal_roofarea(image_grayscale)[0] + cont = [] + for c in contour: + area = cv2.contourArea(c) + if (area >0): + ratio = area / roof_area + if ((area >800) & (ratio < 0.5)): + cont.append(c) + areas = [] + for i, co in enumerate(cont): + areas.append((i, cv2.contourArea(co),co)) + + a2 = sorted(areas, key=lambda d: d[1], reverse=True) + # cv2.drawContours(img, cont, -1, (0, 0, 255), 2) + # cv2.imshow('img',img) + # cv2.waitKey(0) + return [img,a2] + +# calculate the roof area so we can remove a part of the contours +def cal_roofarea(image): + black = cv2.threshold(image, 0, 255, 0)[1] + _,contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) + area = [cv2.contourArea(c) for c in contours] + roof_index = np.argmax(area) + roof_cnt = contours[roof_index] + # contourArea will return the wrong value if the contours are self-intersections + roof_area = cv2.contourArea(roof_cnt) + #print('roof area = '+ str(roof_area)) + return (roof_area,roof_cnt) + +# calculate the mean pixel value in the contours +def getContourStat(img,contour): + mask = np.zeros(img.shape,dtype="uint8") + cv2.drawContours(mask, [contour], -1, 255, -1) + mean,stddev = cv2.meanStdDev(img,mask=mask) + return mean, stddev + + +# use to show the result of kmeans + +def get_mask(img,mask_list): + masks_length = len(mask_list) + mask_color = [(255,0,0),(0,255,0),(0,0,255),(255,255,255),(128,128,128),(0,0,0)] + for i in range(0, masks_length): + img[mask_list[i]!= 0] = mask_color[i] + return img + + +def pole(img, contour): + ori_img = img.copy() + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + cont = cal_roofarea(image_grayscale)[1] + cv2.drawContours(ori_img, cont, -1, (255, 0, 0), 3) + #print(len(contour)) + contour_res =[] + back = 1 + cnt = contour + leftmost = tuple(cnt[cnt[:, :, 0].argmin()][0]) + rightmost = tuple(cnt[cnt[:, :, 0].argmax()][0]) + topmost = tuple(cnt[cnt[:, :, 1].argmin()][0]) + bottommost = tuple(cnt[cnt[:, :, 1].argmax()][0]) + pole = [leftmost,rightmost,topmost,bottommost] + for point in pole: + # check the distance with contours, biggest contour + # when it is negative, means the point is outside the contours + dist = cv2.pointPolygonTest(cont, point, True) + # print(dist) + if (dist <=0): + back = 0 + else: + pass + + return (ori_img,contour,back) +def rotate_rectangle(img_name,img, contour): + + shape= {} + shape['id'] = img_name +# for c in contour: + c = contour + + area = cv2.contourArea(c) + x,y,w,h = cv2.boundingRect(c) + ratiowh = min(float(w/h),float(h/w)) + shape['ratiowh'] = ratiowh + + ratioarea = float(area/(w*h)) + shape['ratioarea'] = ratioarea + + epsilon = 0.01 * cv2.arcLength(c, True) + approx = cv2.approxPolyDP(c, epsilon, True) + + approxlen = len(approx) + shape['approxlen'] = approxlen + + + # the original num set to be -1 to be different no operation + num_angle = 0 + num_angle90 = -1 + num_angle80 = -1 + num_angle70 = -1 + + mask = np.zeros(img.shape, np.uint8) + cv2.drawContours(mask, [approx], -1, (255, 255, 255), -1) + cv2.drawContours(img, [approx], -1, (255, 255, 255), 2) + # mask = np.concatenate((mask, mask, mask), axis=-1) + gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + contour_list = [] + ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) + _,contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # get the list of contours + for points in contours[0]: + x, y = points.ravel() + contour_list.append([x, y]) + corners = cv2.goodFeaturesToTrack(gray, 50, 0.01, 10) + corners = np.int0(corners) + for i in corners: + x, y = i.ravel() + # decide whether the corner is on the contours + if (cv2.pointPolygonTest(contours[0], (x, y), True) == 0): + center_index = contour_list.index([x, y]) + length = len(contour_list) + # get the point three before, and ignore the end point + a_index = center_index - 5 + b_index = center_index + 5 + if ((a_index > 0) & (b_index > 0) & (a_index < length)& (b_index < length)): + xa, ya = contour_list[a_index] + xb, yb = contour_list[b_index] + # print(x , y) + # print(xa, ya) + a = math.sqrt((x - xa) * (x - xa) + (y - ya) * (y - ya)) + b = math.sqrt((x - xb) * (x - xb) + (y - yb) * (y - yb)) + c = math.sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb)) + if ((a > 0) & (b > 0)): + if(((a * a + b * b - c * c) / (2 * a * b))<1) & (((a * a + b * b - c * c) / (2 * a * b) >-1)): + angle = math.degrees(math.acos((a * a + b * b - c * c) / (2 * a * b))) + num_angle =num_angle +1 + # print(angle) + if (angle < 90): + num_angle90 = num_angle90 + 1 + if (angle < 80): + num_angle80 = num_angle80 + 1 + if (angle < 70): + num_angle70 = num_angle70 + 1 + cv2.circle(img, (x, y), 5, 255, -1) + + shape['numangle'] = num_angle + shape['numangle90'] = num_angle90 + shape['numangle80'] = num_angle80 + shape['numangle70'] = num_angle70 +# + + return(shape) +def mean(img,contour): + cont_res = [] + ori_img= img.copy() + + img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean_filter = 0 + c = contour + mean = getContourStat(image_grayscale,c)[0] + hist = kmeans(img)[1] + if (mean[0][0] <= (hist[2]+5)): + # mean = 1 means panel + mean_filter= 1 + + else: + # pass + mean_filter = 0 + # print(mean) +# cv2.drawContours(ori_img, cont_res, -1, (0, 0, 255), -1) + return(ori_img,cont_res,mean_filter) + +def main(): + # num = 0 + house_num = 8 +# initialize the haeader of contour features csv file + csvpath = '/aul/homes/data/house' + str(house_num) + '/contour_features.csv' + with open(csvpath, 'a') as csvfile: + myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + + img_path = gb.glob('/aul/homes/dataset930/house' +str(house_num) + '/roof/*.png') + # store the information of contours(the label) + for path in img_path: + contour = {} + img_name = path.split("/")[-1] + img_name = img_name.split(".")[0] + # print(img_name) + # original image + img = cv2.imread(path) + # this is to show the contours so we can label right + img_contour = img.copy() +# tag = kmeans(img.copy())[2] + tag = kmeans(img)[2] +# masks = get_mask(img, tag) + # get the contours + img_contours= find_contours(img, tag)[0] + contours = find_contours(img, tag)[1] + # filter: to remove the contours which is less than 1 block of solar panel + img_size = filter_size(img, contours)[0] + contourinfo = filter_size(img, contours)[1] + # conotur_num is to tag the contours on the image + contour_num = 0 + rank = 0 + for i, area, c in contourinfo: + contour = {} + rank = rank + 1 + contour['id'] = str(img_name) + '_' + str(rank) + print(contour['id']) + contour['image'] = str(img_name) + contour['size'] = area +# contour['cont'] = c + contour['pole'] = pole(img.copy(), c)[2] + # print(contour['pole']) + # if the value is 1, means it maybe panel + contour['mean'] = mean(img.copy(), c)[2] + # print(contour['mean']) + area = cv2.contourArea(c) + perimeter = cv2.arcLength(c, True) + sq = 4 * math.pi * area / (perimeter * perimeter) + contour['square'] = sq + # print(sq) + shape = rotate_rectangle(img_name,img.copy(), c) + contour['ratiowh'] = shape['ratiowh'] + contour['ratioarea'] = shape['ratioarea'] + contour['approxlen'] = shape['approxlen'] + contour['numangle'] = shape['numangle'] + contour['numangle90'] = shape['numangle90'] + contour['numangle70'] = shape['numangle70'] + csv_path = '/aul/homes/dataset930/house' + str(house_num) + '/contourlabel.csv' + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if(row['id']==contour['id']): +# print(row['id'],row['label']) + contour['label'] = row['label'] +# num = num + 1 + vgg_image = img.copy() + mask = np.zeros_like(img) + img2gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + mask = cv2.drawContours(img2gray, [c], 0, (255, 255, 255, 1), -1) + img_result = cv2.bitwise_or(vgg_image, vgg_image, mask=mask) + cv2.imwrite(os.path.join('/aul/homes/data/house' + str(house_num) + '/contour_all/' +contour['id'] + '.png'),img_result) + with open(csvpath, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + print('finish') +main() + + + + + + diff --git a/data_preprocessing/contour_nopanel_extraction.py b/data_preprocessing/contour_nopanel_extraction.py new file mode 100644 index 0000000..3d6a09e --- /dev/null +++ b/data_preprocessing/contour_nopanel_extraction.py @@ -0,0 +1,25 @@ +import csv +import cv2 +csvpath = '/aul/homes/final_contour/house3' + '/nopanelcontour_features.csv' +with open(csvpath, 'a') as updatecsv: + myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(updatecsv, fieldnames=myFields) + writer.writeheader() +updatecsv.close() +csv_path = '/aul/homes/data/house3/contour_features.csv' +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if (row['label']==str(0)): + contour = row + img_path = '/aul/homes/data/house3/' + 'contour_all/' + row['id'] + '.png' + img = cv2.imread(img_path) + img_newpath = '/aul/homes/final_contour/house3/nopanel/' + row['id'] + '.png' + cv2.imwrite(img_newpath ,img) + print(contour['id']) + with open(csvpath, 'a') as updatecsv: + writer = csv.writer(updatecsv) + writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + updatecsv.close() +csvfile.close() +print('finish') \ No newline at end of file diff --git a/data_preprocessing/contour_panel_extraction.py b/data_preprocessing/contour_panel_extraction.py new file mode 100644 index 0000000..cb9cc74 --- /dev/null +++ b/data_preprocessing/contour_panel_extraction.py @@ -0,0 +1,23 @@ +import csv +import cv2 + +num = 0 +csv_path = '/aul/homes/final/nosplit/train/train_nopanel.csv' +csvpath_train_nopanel = '/aul/homes/final/nosplit/train/train_nopanel.csv' +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: +# if (row['label']==str(1)): +# contour = row + img_name = row['id'] + location = row['location'][-1] + if (location == '0'): + print(row['locaiton'] + +# print(location) + img_path = '/aul/homes/final_contour/house' + str('location') + '/panel/' + 'img_name' + '.png'' + img = cv2.imread(img_path) + img_newpath = '/aul/homes/final/split/train/nopanel/' + 'img_name' + '.png' + cv2.imwrite(img_newpath ,img) +csvfile.close() + \ No newline at end of file diff --git a/data_preprocessing/csvupdate.py b/data_preprocessing/csvupdate.py new file mode 100644 index 0000000..0c9bd3d --- /dev/null +++ b/data_preprocessing/csvupdate.py @@ -0,0 +1,29 @@ +import csv +import os.path +from os import path + +csv_path = '/aul/homes/dataset/dataset930/house' + str(1) + '/house' + str(1) + '.csv' +csvpath = '/aul/homes//dataset/dataset930/house' + str(1) + '/location' + str(1) + '.csv' + +with open(csvpath, 'a') as csvupdate: + myFields = ['id', 'location','label'] + writer = csv.DictWriter(csvupdate, fieldnames=myFields) + writer.writeheader() +csvupdate.close() +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + img = {} + img['id'] = row['id'] + img['location'] = row['location'] + img['lable'] = row['label'] + if (path.exists('/aul/homes/dataset/dataset930/house' + str(1) + '/roof/' + img['id'] + '.png') == True): + with open(csvpath, 'a') as csvupdate: + writer = csv.writer(csvupdate) + writer.writerow([img['id'],img['location'], img['lable']]) + csvupdate.close() +csvfile.close() +print('finish') + + + diff --git a/data_preprocessing/data_augment.py b/data_preprocessing/data_augment.py new file mode 100644 index 0000000..e45c0a9 --- /dev/null +++ b/data_preprocessing/data_augment.py @@ -0,0 +1,20 @@ +import Augmentor +p = Augmentor.Pipeline("/aul/homes/final/split/location17/panel/") +# Point to a directory containing ground truth data. +# Images with the same file names will be added as ground truth data +# and augmented in parallel to the original data. + +# Point to a directory containing ground truth data. +# Images with the same file names will be added as ground truth data +# and augmented in parallel to the original data. + +# Add operations to the pipeline as normal: +p.rotate90(probability=1) +p.rotate270(probability=1) +p.flip_left_right(probability=1) +p.flip_top_bottom(probability=1) +p.rotate(probability=1, max_left_rotation=5, max_right_rotation=5) +p.flip_left_right(probability=1) +p.zoom_random(probability=1, percentage_area=0.8) +p.flip_top_bottom(probability=1) +p.sample(27420) diff --git a/data_preprocessing/data_integration.py b/data_preprocessing/data_integration.py new file mode 100644 index 0000000..895786c --- /dev/null +++ b/data_preprocessing/data_integration.py @@ -0,0 +1,115 @@ +import csv +import cv2 + + +csvpath_all = '/aul/homes/final/split/location810/contour_all.csv' +with open(csvpath_all, 'a') as csvfile: + myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +csvpath_yes = '/aul/homes/final/split/location810/contour_features.csv' +with open(csvpath_yes, 'a') as csvfile: + myFields = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +csvpath_no = '/aul/homes/final/split/location810/no_contour_features.csv' +with open(csvpath_no, 'a') as csvfile: + myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +for i in range(8,11): + csv_path = '/aul/homes/final_contour/house' + str(i) + '/contour_all.csv' + with open(csv_path, newline='') as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + contour = {} + contour = row + contour['location'] = 'location' + str(i) + with open(csvpath_all, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() +# print(contour) + if(contour['label'] == str(1)): + with open(csvpath_yes, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + + if(contour['label'] == str(0)): + with open(csvpath_no, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + + + csv_file.close() + print(csv_path) + + + + + +import csv +import cv2 + + +csvpath_train_nopanel = '/aul/homes/final/nosplit/train/train_nopanel.csv' +with open(csvpath_train_nopanel, 'a') as csvfile: + myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +csvpath_test_nopanel = '/aul/homes/final/nosplit/test/test_nopanel.csv' +with open(csvpath_test_nopanel , 'a') as csvfile: + myFields = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +csvpath_validation_nopanel = '/aul/homes/final/nosplit/validation/validation_nopanel.csv' +with open(csvpath_validation_nopanel, 'a') as csvfile: + myFields = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + + +csv_path = '/aul/homes/final/nosplit/no_contour_features.csv' +i = 0 +with open(csv_path, newline='') as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + contour = {} + contour = row + if ((i %10) <3): + with open(csvpath_test_nopanel, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + +# print(contour) + elif ((i %10) >7): + with open(csvpath_validation_nopanel, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + + else: + with open(csvpath_train_nopanel, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + i = i + 1 + +csv_file.close() + + + \ No newline at end of file diff --git a/data_preprocessing/feature_extraction.py b/data_preprocessing/feature_extraction.py new file mode 100644 index 0000000..6fb22af --- /dev/null +++ b/data_preprocessing/feature_extraction.py @@ -0,0 +1,265 @@ +# OpenCV lib +import os + + +import cv2 +import glob as gb +import numpy as np +import csv +import math + +def getContourStat(img, contour): + mask = np.zeros(img.shape, dtype="uint8") + cv2.drawContours(mask, [contour], -1, 255, -1) + mean, stddev = cv2.meanStdDev(img, mask=mask) + return mean, stddev + +def cal_roofarea(image): + black = cv2.threshold(image, 0, 255, 0)[1] + _,contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) + area = [cv2.contourArea(c) for c in contours] + roof_index = np.argmax(area) + roof_cnt = contours[roof_index] + # contourArea will return the wrong value if the contours are self-intersections + roof_area = cv2.contourArea(roof_cnt) + #print('roof area = '+ str(roof_area)) + return (roof_area,roof_cnt) + +def pole(img, contour): + ori_img = img.copy() + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + cont = cal_roofarea(image_grayscale)[1] + cv2.drawContours(ori_img, cont, -1, (255, 0, 0), 3) + #print(len(contour)) + contour_res =[] + back = 1 + cnt = contour + leftmost = tuple(cnt[cnt[:, :, 0].argmin()][0]) + rightmost = tuple(cnt[cnt[:, :, 0].argmax()][0]) + topmost = tuple(cnt[cnt[:, :, 1].argmin()][0]) + bottommost = tuple(cnt[cnt[:, :, 1].argmax()][0]) + pole = [leftmost,rightmost,topmost,bottommost] + for point in pole: + # check the distance with contours, biggest contour + # when it is negative, means the point is outside the contours + dist = cv2.pointPolygonTest(cont, point, True) + # print(dist) + if (dist <=0): + back = 0 + else: + pass + + return (ori_img,contour,back) +def rotate_rectangle(img_name,img, contour): + + shape= {} + shape['id'] = img_name +# for c in contour: + c = contour + + area = cv2.contourArea(c) + x,y,w,h = cv2.boundingRect(c) + ratiowh = min(float(w/h),float(h/w)) + shape['ratiowh'] = ratiowh + + ratioarea = float(area/(w*h)) + shape['ratioarea'] = ratioarea + + epsilon = 0.01 * cv2.arcLength(c, True) + approx = cv2.approxPolyDP(c, epsilon, True) + + approxlen = len(approx) + shape['approxlen'] = approxlen + + + # the original num set to be -1 to be different no operation + num_angle = 0 + num_angle90 = -1 + num_angle80 = -1 + num_angle70 = -1 + + mask = np.zeros(img.shape, np.uint8) + cv2.drawContours(mask, [approx], -1, (255, 255, 255), -1) + cv2.drawContours(img, [approx], -1, (255, 255, 255), 2) + # mask = np.concatenate((mask, mask, mask), axis=-1) + gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + contour_list = [] + ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) + _,contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # get the list of contours + for points in contours[0]: + x, y = points.ravel() + contour_list.append([x, y]) + corners = cv2.goodFeaturesToTrack(gray, 50, 0.01, 10) + corners = np.int0(corners) + for i in corners: + x, y = i.ravel() + # decide whether the corner is on the contours + if (cv2.pointPolygonTest(contours[0], (x, y), True) == 0): + center_index = contour_list.index([x, y]) + length = len(contour_list) + # get the point three before, and ignore the end point + a_index = center_index - 5 + b_index = center_index + 5 + if ((a_index > 0) & (b_index > 0) & (a_index < length)& (b_index < length)): + xa, ya = contour_list[a_index] + xb, yb = contour_list[b_index] + # print(x , y) + # print(xa, ya) + a = math.sqrt((x - xa) * (x - xa) + (y - ya) * (y - ya)) + b = math.sqrt((x - xb) * (x - xb) + (y - yb) * (y - yb)) + c = math.sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb)) + if ((a > 0) & (b > 0)): + if(((a * a + b * b - c * c) / (2 * a * b))<1) & (((a * a + b * b - c * c) / (2 * a * b) >-1)): + angle = math.degrees(math.acos((a * a + b * b - c * c) / (2 * a * b))) + num_angle =num_angle +1 + # print(angle) + if (angle < 90): + num_angle90 = num_angle90 + 1 + if (angle < 80): + num_angle80 = num_angle80 + 1 + if (angle < 70): + num_angle70 = num_angle70 + 1 + cv2.circle(img, (x, y), 5, 255, -1) + + shape['numangle'] = num_angle + shape['numangle90'] = num_angle90 + shape['numangle80'] = num_angle80 + shape['numangle70'] = num_angle70 + + return(shape) + +def main(): + # the file store the contour file + csvpath_all = '/aul/homes/final_contour/house3/contour_all.csv' + with open(csvpath_all, 'a') as csvfile: + myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + + csvpath_yes = '/aul/homes/final_contour/house3/contour_features.csv' + with open(csvpath_yes, 'a') as csvfile: + myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + + csvpath_no = '/aul/homes/final_contour/house3/no_contour_features.csv' + with open(csvpath_no, 'a') as csvfile: + myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + + + img_path = gb.glob('/aul/homes/final_contour/house3/panel/*.png') + npy_path = '/aul/homes/dataset/dataset930/house3/contour/' + for path in img_path: + contour = {} + contour_name = path.split("/")[-1] + contour_name = contour_name.split(".")[0] + contour['id'] = contour_name + img_name = contour_name.split("_")[0] +# print(img_name) + c = np.load(npy_path + contour_name + '.npy') +# print(c) + # the file store images + img = cv2.imread('/aul/homes/dataset/dataset930/house3/roof/'+ img_name + '.png') + cv2.drawContours(img, c, -1, (0, 255, 0), 3) + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean = getContourStat(image_grayscale, c)[0] + stddev =getContourStat(image_grayscale, c)[1] + contour['mean'] = mean[0][0] + contour['stddev'] = stddev[0][0] + + contour['image'] = str(img_name) + contour['size'] = cv2.contourArea(c) +# contour['cont'] = c + contour['pole'] = pole(img.copy(), c)[2] + area = cv2.contourArea(c) + perimeter = cv2.arcLength(c, True) + sq = 4 * math.pi * area / (perimeter * perimeter) + contour['square'] = sq + # print(sq) + shape = rotate_rectangle(img_name,img.copy(), c) + contour['ratiowh'] = shape['ratiowh'] + contour['ratioarea'] = shape['ratioarea'] + contour['approxlen'] = shape['approxlen'] + contour['numangle'] = shape['numangle'] + contour['numangle90'] = shape['numangle90'] + contour['numangle70'] = shape['numangle70'] + contour['label'] = str(1) + # the file to store the mean value and stddev + with open(csvpath_all, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + with open(csvpath_yes, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + print('finish') + + + img_path = gb.glob('/aul/homes/final_contour/house3/nopanel/*.png') + npy_path = '/aul/homes/dataset/dataset930/house3/contour/' + for path in img_path: + contour = {} + contour_name = path.split("/")[-1] + contour_name = contour_name.split(".")[0] + contour['id'] = contour_name + img_name = contour_name.split("_")[0] +# print(img_name) + c = np.load(npy_path + contour_name + '.npy') +# print(c) + # the file store images + img = cv2.imread('/aul/homes/dataset/dataset930/house3/roof/'+ img_name + '.png') + cv2.drawContours(img, c, -1, (0, 255, 0), 3) + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean = getContourStat(image_grayscale, c)[0] + stddev =getContourStat(image_grayscale, c)[1] + contour['mean'] = mean[0][0] + contour['stddev'] = stddev[0][0] + + contour['image'] = str(img_name) + contour['size'] = cv2.contourArea(c) +# contour['cont'] = c + contour['pole'] = pole(img.copy(), c)[2] + area = cv2.contourArea(c) + perimeter = cv2.arcLength(c, True) + sq = 4 * math.pi * area / (perimeter * perimeter) + contour['square'] = sq + # print(sq) + shape = rotate_rectangle(img_name,img.copy(), c) + contour['ratiowh'] = shape['ratiowh'] + contour['ratioarea'] = shape['ratioarea'] + contour['approxlen'] = shape['approxlen'] + contour['numangle'] = shape['numangle'] + contour['numangle90'] = shape['numangle90'] + contour['numangle70'] = shape['numangle70'] + contour['label'] = str(0) + # the file to store the mean value and stddev + with open(csvpath_all, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + with open(csvpath_no, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label']]) + csvfile.close() + print('finish') + + + + +main() + + + + + + + + diff --git a/data_preprocessing/mean_cache.py b/data_preprocessing/mean_cache.py new file mode 100644 index 0000000..5cf428c --- /dev/null +++ b/data_preprocessing/mean_cache.py @@ -0,0 +1,74 @@ +#openCV lib +import os + + +import cv2 +import glob as gb +import numpy as np +import csv +import math + +def getContourStat(img, contour): + mask = np.zeros((800,800), dtype="uint8") + cv2.drawContours(mask, [contour], -1, 255, -1) + mean, stddev = cv2.meanStdDev(img, mask=mask) + return mean, stddev + +def main(): + # the file store the contour file + csvpath_all = '/aul/homes/1019/split/feature_all.csv' + with open(csvpath_all, 'a') as csvfile: + myFields = ['id','location','image', 'size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label','vgg_pro','vgg_class'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + # image path + + img_path_panel = gb.glob('/aul/homes/final_contour/house'+ str(i) +'/panel/*.png') + img_path_nopanel = gb.glob('/aul/homes/final_contour/house'+ str(i) +'/nopanel/*.png') + npy_path = '/aul/homes/dataset/dataset930/house'+ str(i) +'/contour/' + csv_path = '/aul/homes/1019/split/feature17.csv' + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + contour = row + i = contour['location'][-1] + if (i =='0'): + i = '10' + if (contour['label']=='1'): + path = img_path_panel + if (contour['label']=='0'): + path = img_path_nopanel + img = cv2.imread(path) + c = np.load(npy_path + contour['image'] + '.npy') + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean = getContourStat(image_grayscale, c)[0] + stddev =getContourStat(image_grayscale, c)[1] + contour['mean'] = mean[0][0] + contour['stddev'] = stddev[0][0] + mean_all = getContourStat(img, c)[0] + stddev_all = getContourStat(img, c)[1] + contour['b_mean'] = mean_all[0][0] + contour['g_mean'] = mean_all[1][0] + contour['r_mean'] = mean_all[2][0] + contour['b_stddev'] = stddev_all[0][0] + contour['g_stddev'] = stddev_all[1][0] + contour['r_stddev'] = stddev_all[2][0] + + with open(csvpath_all, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['b_mean'],contour['g_mean'],contour['r_mean'],contour['b_stddev'],contour['g_stddev'],contour['r_stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label'],contour['vgg_pro'],contour['vgg_class']]) + csvfile.close() + +main() + + + + + + + + + + + diff --git a/data_preprocessing/mean_cache_nosplit.py b/data_preprocessing/mean_cache_nosplit.py new file mode 100644 index 0000000..1fa2156 --- /dev/null +++ b/data_preprocessing/mean_cache_nosplit.py @@ -0,0 +1,73 @@ +#openCV lib +import os + + +import cv2 +import glob as gb +import numpy as np +import csv +import math + +def getContourStat(img, contour): + mask = np.zeros((800,800), dtype="uint8") + cv2.drawContours(mask, [contour], -1, 255, -1) + mean, stddev = cv2.meanStdDev(img, mask=mask) + return mean, stddev + +def main(): +# the file store the contour file + csvpath_all = '/aul/homes/1019/nosplit/feature_train_all.csv' + with open(csvpath_all, 'a') as csvfile: + myFields = ['id','image', 'size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label','vgg_pro','vgg_class'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + # image path + + csv_path = '/aul/homes/1019/nosplit/feature_train.csv' + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + contour = row + img_path_panel = '/aul/homes/final/nosplit/panel/' + contour['id'] +'.png' + img_path_nopanel = '/aul/homes/final/nosplit/nopanel/' + contour['id'] +'.png' + npy_path = '/aul/homes/dataset/dataset930/npy/' + if (contour['label']=='1'): + path = img_path_panel + if (contour['label']=='0'): + path = img_path_nopanel + + img = cv2.imread(path) + c = np.load(npy_path + contour['id'] + '.npy') + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean = getContourStat(image_grayscale, c)[0] + stddev =getContourStat(image_grayscale, c)[1] + contour['mean'] = mean[0][0] + contour['stddev'] = stddev[0][0] + mean_all = getContourStat(img, c)[0] + stddev_all = getContourStat(img, c)[1] + contour['b_mean'] = mean_all[0][0] + contour['g_mean'] = mean_all[1][0] + contour['r_mean'] = mean_all[2][0] + contour['b_stddev'] = stddev_all[0][0] + contour['g_stddev'] = stddev_all[1][0] + contour['r_stddev'] = stddev_all[2][0] + + with open(csvpath_all, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['b_mean'],contour['g_mean'],contour['r_mean'],contour['b_stddev'],contour['g_stddev'],contour['r_stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label'],contour['vgg_pro'],contour['vgg_class']]) + csvfile.close() + +main() + + + + + + + + + + + + diff --git a/data_preprocessing/mean_cache_split.py b/data_preprocessing/mean_cache_split.py new file mode 100644 index 0000000..a48d7c3 --- /dev/null +++ b/data_preprocessing/mean_cache_split.py @@ -0,0 +1,72 @@ +# OpenCV lib +import os + + +import cv2 +import glob as gb +import numpy as np +import csv +import math + +def getContourStat(img, contour): + mask = np.zeros((800,800), dtype="uint8") + cv2.drawContours(mask, [contour], -1, 255, -1) + mean, stddev = cv2.meanStdDev(img, mask=mask) + return mean, stddev + +def main(): +# the file store the contour file + csvpath_all = './feature_train_all.csv' + with open(csvpath_all, 'a') as csvfile: + myFields = ['id','location','image', 'size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label','vgg_pro','vgg_class'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + # image path + + csv_path = './feature_train.csv' + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + contour = row + i = contour['location'][-1] + if (i =='0'): + i = '10' + img_path_panel = './final_contour/house'+ str(i) +'/panel/' + contour['id'] +'.png' + img_path_nopanel = './final_contour/house'+ str(i) +'/nopanel/' + contour['id'] +'.png' + npy_path = './dataset930/house'+ str(i) +'/contour/' + if (contour['label']=='1'): + path = img_path_panel + if (contour['label']=='0'): + path = img_path_nopanel + + img = cv2.imread(path) + c = np.load(npy_path + contour['id'] + '.npy') + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean = getContourStat(image_grayscale, c)[0] + stddev =getContourStat(image_grayscale, c)[1] + contour['mean'] = mean[0][0] + contour['stddev'] = stddev[0][0] + mean_all = getContourStat(img, c)[0] + stddev_all = getContourStat(img, c)[1] + contour['b_mean'] = mean_all[0][0] + contour['g_mean'] = mean_all[1][0] + contour['r_mean'] = mean_all[2][0] + contour['b_stddev'] = stddev_all[0][0] + contour['g_stddev'] = stddev_all[1][0] + contour['r_stddev'] = stddev_all[2][0] + + with open(csvpath_all, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['b_mean'],contour['g_mean'],contour['r_mean'],contour['b_stddev'],contour['g_stddev'],contour['r_stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['label'],contour['vgg_pro'],contour['vgg_class']]) + csvfile.close() + +main() + + + + + + + + diff --git a/data_preprocessing/read_calculatemetric.py b/data_preprocessing/read_calculatemetric.py new file mode 100644 index 0000000..0ff1158 --- /dev/null +++ b/data_preprocessing/read_calculatemetric.py @@ -0,0 +1,41 @@ +import math +import csv + + +def metric(des,panel_panel, panel_nopanel,nopanel_panel,nopanel_nopanel): + metric = {} + TP = int(panel_panel) + FN = int(panel_nopanel) + FP = int(nopanel_panel) + TN = int(nopanel_nopanel) + ACCURACY = float((TP + TN)/(TP + FP + FN + TN)) + PRECISION = float(TP/(TP + FP)) + RECALL = float(TP/(TP + FN)) + F1 = float(2*PRECISION*RECALL/(PRECISION + RECALL)) + MCC = float((TP * TN - FP * FN)/ math.sqrt((TP + FP) * (FN + TN) * (FP + TN) * (TP + FN))) + SPECIFICITY = float(TN/(TN + FP)) + metric['TP'] = float(TP/(TP + FN)) + metric['FN'] = float(FN /(TP + FN)) + metric['TN'] = float(TN /(TN + FP)) + metric['FP'] =float(FP /(TN + FP)) + metric['ACCURACY'] = ACCURACY + metric['PRECISION'] =PRECISION + metric['RECALL']= RECALL + metric['F1'] = F1 + metric['MCC'] = MCC + metric['SPECIFICITY'] = SPECIFICITY + metric['description'] = des + print(metric) + csvpath = './result1.csv' + with open(csvpath, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([metric['description'],metric['TP'],metric['FN'],metric['TN'],metric['FP'],metric['ACCURACY'],metric['MCC'],metric['F1'],metric['SPECIFICITY'],metric['PRECISION'],metric['RECALL']]) + csvfile.close() + +def main(): + with open('./result.csv', newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + metric(row['des'],row['TP'],row['FN'],row['FP'],row['TN']) + csvfile.close() +main() diff --git a/data_preprocessing/under_smaple.py b/data_preprocessing/under_smaple.py new file mode 100644 index 0000000..ca71600 --- /dev/null +++ b/data_preprocessing/under_smaple.py @@ -0,0 +1,12 @@ +import os +import cv2 +import glob as gb +num = 0 +img_path = gb.glob("./*.png") +for path in img_path: + img_name = path.split("/")[-1] + img = cv2.imread(path) + if ((num % 5) == 0): + cv2.imwrite(os.path.join('./' + img_name),img) + num = num + 1 + \ No newline at end of file diff --git a/evaluation/generate_evaluation_confusionmatrics.py b/evaluation/generate_evaluation_confusionmatrics.py new file mode 100644 index 0000000..c9f416d --- /dev/null +++ b/evaluation/generate_evaluation_confusionmatrics.py @@ -0,0 +1,40 @@ +import math +import csv + + +def metric(des,panel_panel, panel_nopanel,nopanel_panel,nopanel_nopanel): + metric = {} + TP = int(panel_panel) + FN = int(panel_nopanel) + FP = int(nopanel_panel) + TN = int(nopanel_nopanel) + ACCURACY = float((TP + TN)/(TP + FP + FN + TN)) + PRECISION = float(TP/(TP + FP)) + RECALL = float(TP/(TP + FN)) + F1 = float(2*PRECISION*RECALL/(PRECISION + RECALL)) + MCC = float((TP * TN - FP * FN)/ math.sqrt((TP + FP) * (FN + TN) * (FP + TN) * (TP + FN))) + SPECIFICITY = float(TN/(TN + FP)) + metric['TP'] = float(TP/(TP + FN)) + metric['FN'] = float(FN /(TP + FN)) + metric['TN'] = float(TN /(TN + FP)) + metric['FP'] =float(FP /(TN + FP)) + metric['ACCURACY'] = ACCURACY + metric['PRECISION'] =PRECISION + metric['RECALL']= RECALL + metric['F1'] = F1 + metric['MCC'] = MCC + metric['SPECIFICITY'] = SPECIFICITY + metric['description'] = des + print(metric) + csvpath = './resultall.csv' + with open(csvpath, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([metric['description'],metric['TP'],metric['FN'],metric['TN'],metric['FP'],metric['ACCURACY'],metric['MCC'],metric['F1'],metric['SPECIFICITY'],metric['PRECISION'],metric['RECALL']]) + csvfile.close() +def main(): + with open('./result.csv', newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + metric(row['des'],row['TP'],row['FN'],row['FP'],row['TN']) + csvfile.close() +main() \ No newline at end of file diff --git a/evaluation/iou_calculation.py b/evaluation/iou_calculation.py new file mode 100644 index 0000000..af6f1c5 --- /dev/null +++ b/evaluation/iou_calculation.py @@ -0,0 +1,18 @@ +import csv +import math +num = {} +for i in range(0,11): + num[i] = 0 +number = 0 +csv_path = './rooftop_iou.csv' +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + iou = float(row['iou']) + for i in range(0,11): + if (iou > i*0.1): + num[i] = num[i] +1 + number = number + 1 +csvfile.close() +print(num) +print(number) diff --git a/evaluation/iou_score_generator.py b/evaluation/iou_score_generator.py new file mode 100644 index 0000000..2bf2420 --- /dev/null +++ b/evaluation/iou_score_generator.py @@ -0,0 +1,102 @@ +import numpy as np +import cv2 +import sys +import csv + +import os.path as ospath + +def hotkey(): + global outline_list + global current_outline + + KEY_UNDO = ord('u') + KEY_CLEAN = ord('c') + KEY_NEXT = ord('n') + KEY_SAVE = ord('s') + KEY_QUIT = ord('q') + + key = cv2.waitKey(0) + if key == KEY_QUIT: + print('*** Quit') + exit() + else: + print('*** Next Image') + cv2.destroyAllWindows() + +def main(argv): + # print ('Number of arguments:', len(argv), 'arguments.') + # print ('Argument List:', str(argv)) + contours_dir = "./data/panel/" + rooftop_img_dir = "./panel/" + rooftop_csv_path = './data/rooftop_solar_array_outlines_new.csv' + rooftop_iou_csv_path = './rooftop_iou.csv' + with open(rooftop_iou_csv_path, 'a') as csvfile: + myFields = ['id', 'location_id', 'label', 'solar_list', 'contour_num','iou'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + with open(rooftop_csv_path, newline='') as rooftop_csv_file: + reader = csv.DictReader(rooftop_csv_file) + for row in reader: + roof = {} + roof = row + contour_mask = eval(row['contour_num']) + # print(contour_mask) + contour_img = np.zeros((800,800,3), np.uint8) + for contour in contour_mask: + contour_path = contours_dir + contour + '.png' + # print(contour_path ) + img = cv2.imread(contour_path) + # cv2.imshow('img', img) + # cv2.waitKey(0) + excluded_color = [0, 0, 0] + indices_list = np.where(np.all(img != excluded_color, axis=-1)) + contour_img[indices_list] = [255, 255, 255] + # cv2.imshow('img',contour_img) + # cv2.waitKey(0) + + solar_mask = np.zeros((800,800,3), np.uint8) + outline_list = eval(row['solar_list']) + for outline in outline_list: + # print(outline) + pts = np.asarray(outline) + cv2.fillPoly(solar_mask, np.int_([pts]), (255, 255, 255)) + # cv2.polylines(solar_mask, [pts], True, (0, 0, 255), 2) + # cv2.imshow('img', solar_mask) + # cv2.waitKey(0) + # cv2.fillPoly(img_to_show, np.int_([pts]), (198, 133, 61)) + # cv2.fillPoly(img_to_show, np.int_([pts]), (255, 255, 255)) + # + predict_gray_mask = cv2.cvtColor(contour_img, cv2.COLOR_BGR2GRAY) + label_gray_mask = cv2.cvtColor(solar_mask, cv2.COLOR_BGR2GRAY) + # + # # rooftop_mask_size = cv2.countNonZero(rooftop_gray_mask) + # # solar_mask_size = cv2.countNonZero(solar_gray_mask) + # # size_ration = solar_mask_size / rooftop_mask_size + # # print(rooftop_mask_size) + # # print(solar_mask_size) + # # print(size_ration) + # + # # IOU Score + intersection = np.logical_and(predict_gray_mask, label_gray_mask) + union = np.logical_or(predict_gray_mask, label_gray_mask) + iou_score = np.sum(intersection) / np.sum(union) + # print(iou_score) + # + # print(iou_score) + # + # # print(size_ration/iou_score) + + # cv2.imshow(row['id'], img_to_show) + + # hotkey() + roof['iou'] = iou_score + with open(rooftop_iou_csv_path, 'a') as csvfile_new: + writer = csv.writer(csvfile_new) + writer.writerow([roof['id'], roof['location_id'], roof['label'], + roof['solar_list'], roof['contour_num'],roof['iou']]) + csvfile_new.close() + + rooftop_csv_file.close() + +if __name__ == "__main__": + main(sys.argv[1:]) \ No newline at end of file diff --git a/evaluation/orientation_calculate.py b/evaluation/orientation_calculate.py new file mode 100644 index 0000000..22713b2 --- /dev/null +++ b/evaluation/orientation_calculate.py @@ -0,0 +1,33 @@ +import csv +import math +csv_path = './orientation_positive.csv' +num_all = 0 +num_5 = 0 +num_10 = 0 +num_15 = 0 +num_20 = 0 +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + contour_orientation =float(row['contour']) + roof_orientation = float(row['roof']) + contour_orientation_45differ = math.fabs(math.fabs(contour_orientation)- 45) + roof_orientation_45differ = math.fabs(math.fabs(roof_orientation)- 45) + differ = math.fabs(contour_orientation_45differ - roof_orientation_45differ) + if(differ < 5): + num_5 = num_5 + 1 + if (differ < 10): + num_10 = num_10 + 1 + if (differ < 15): + num_15 = num_15 + 1 + if (differ < 20): + num_20 = num_20 + 1 + num_all = num_all + 1 +csvfile.close() +percent_5 = num_5 /num_all +percent_10 = num_10 /num_all +percent_15 = num_15 /num_all +percent_20 = num_20 /num_all + +print(num_all ,num_5,num_10,num_15,num_20) +print(percent_5,percent_10,percent_15,percent_20) \ No newline at end of file diff --git a/evaluation/postive_contour_generate.py b/evaluation/postive_contour_generate.py new file mode 100644 index 0000000..31c5594 --- /dev/null +++ b/evaluation/postive_contour_generate.py @@ -0,0 +1,19 @@ + +import csv +csv_path ='./feature_test_all_vgg_svm_linear.csv' +csv_path_new = './contour_all_positive.csv' +with open(csv_path_new, 'a') as csvfile: + myFields = ['id', 'location','image', 'label','predict'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + contour = row + if(contour['linear_nosplit_class']== '1'): + with open(csv_path_new , 'a') as csvfile_new: + writer = csv.writer(csvfile_new) + writer.writerow([contour['id'], contour['location'], contour['image'],contour['label'], contour['linear_nosplit_class']]) + csvfile_new.close() +csvfile.close() diff --git a/evaluation/roof_contour_match.py b/evaluation/roof_contour_match.py new file mode 100644 index 0000000..bd9cb44 --- /dev/null +++ b/evaluation/roof_contour_match.py @@ -0,0 +1,33 @@ +import csv +csv_path = './rooftop_solar_array_outlines.csv' +csv_path_new = './rooftop_solar_array_outlines_new.csv' +csv_path_contour = './contour_all_positive.csv' +with open(csv_path_new, 'a') as csvfile: + myFields = ['id', 'location','location_id','label','solar_list','contour_num'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + contour = row + img_name = contour['id'] + contour_num = [] + with open(csv_path_contour, newline='') as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + if(row['image'] == img_name): + if (row['id'] not in contour_num): + contour_num.append(row['id']) + else: + pass + print(contour_num) + csv_file.close() + contour['contour_num'] = contour_num + with open(csv_path_new, 'a') as csvfile_new: + writer = csv.writer(csvfile_new) + writer.writerow([contour['id'], contour['location'], contour['location_id'], contour['label'], + contour['solar_list'],contour['contour_num']]) + csvfile_new.close() +csvfile.close() + + diff --git a/evaluation/solar_array_orientation.py b/evaluation/solar_array_orientation.py new file mode 100644 index 0000000..5f72a3e --- /dev/null +++ b/evaluation/solar_array_orientation.py @@ -0,0 +1,100 @@ +import os +import cv2 +from skimage.segmentation import slic +from skimage import color +from skimage import data +from skimage import io +# Traverse files +import glob as gb +# Math lib +import numpy as np +import time +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import math +import csv +import os.path as path + +from matplotlib.pyplot import imshow +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +def cal_roofarea(image): + black = cv2.threshold(image, 0, 255, 0)[1] + # cv2.imshow('img', black) + # cv2.waitKey(0) + contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) + area = [cv2.contourArea(c) for c in contours] + roof_index = np.argmax(area) + roof_cnt = contours[roof_index] + # contourArea will return the wrong value if the contours are self-intersections + roof_area = cv2.contourArea(roof_cnt) + #print('roof area = '+ str(roof_area)) + return (roof_area,roof_cnt) + + + + +img_path = './panel/' +contours_path = './projects/data/panel/' +csv_path = './vggsvmlogicalregression2features.csv' +with open('./data/orientation_positive.csv', 'a') as csvfile: + myFields = ['id', 'image','contour','roof'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() +# num_all = 0 +# num_5 =0 +# num_10 =0 +# num_15 = 0 +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + orientation = {} + if(row['label'] == '1' and row['vggsvmlogicalregression2features']=='1'): + orientation['id'] =row['id'] + orientation['image'] = row['image'] + img_name = row['image'] + contour_name = row['id'] + image_path = img_path + img_name + '.png' + contour_path = img_path + contour_name + '.png' + if path.exists(image_path): + if path.exists(contour_path ): + img_roof = cv2.imread(image_path) + img_contour = cv2.imread(contour_path) + # cal_roofarea(img) + img_contour_grayscale = cv2.cvtColor(img_contour, cv2.COLOR_BGR2GRAY) + cont_contour = cal_roofarea(img_contour_grayscale)[1] + cv2.drawContours(img_contour, cont_contour, -1, (0, 0, 255), -1) + rect_contour = cv2.minAreaRect(cont_contour) + orientation['contour'] = rect_contour[2] + # print(rect_contour[2]) + # box_contour = cv2.boxPoints(rect_contour) + # box = np.int0(box) + # print(box) + # cv2.drawContours(img_contour, [box], 0, (255, 0, 0), 1) + img_roof_grayscale = cv2.cvtColor(img_roof, cv2.COLOR_BGR2GRAY) + cont_roof = cal_roofarea(img_roof_grayscale )[1] + # cv2.drawContours(img , cont, -1, (0, 0, 255), -1) + rect_roof = cv2.minAreaRect(cont_roof) + orientation['roof'] = rect_roof[2] + # print(rect[2]) + # box = cv2.boxPoints(rect) + # box = np.int0(box) + # # print(box) + # cv2.drawContours(img, [box], 0, (255, 0, 0), 1) + # + # x, y, w, h = cv2.boundingRect(cont) + # cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) + # print(x,y,w,h) + # print(cal_roofarea(cont)[0]) + print(orientation) + # cv2.imshow('img', img_contour) + # cv2.waitKey(0) + with open('./data/orientation_positive.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([orientation['id'], orientation['image'], orientation['contour'],orientation['roof']]) + csvfile.close() + + +csvfile.close() \ No newline at end of file diff --git a/models/PCA/components.py b/models/PCA/components.py new file mode 100644 index 0000000..5f78c91 --- /dev/null +++ b/models/PCA/components.py @@ -0,0 +1,20 @@ +import numpy as np +from sklearn.decomposition import PCA +from sklearn.preprocessing import MinMaxScaler + +filepath = './vgg_predict.csv' #your path here +data = np.genfromtxt(filepath, delimiter=',', dtype='float64') + +scaler = MinMaxScaler(feature_range=[0, 1]) +data_rescaled = scaler.fit_transform(data[1:, 3:13]) +#Fitting the PCA algorithm with our Data +pca = PCA().fit(data_rescaled) +#Plotting the Cumulative Summation of the Explained Variance +plt.figure() +plt.plot(np.cumsum(pca.explained_variance_ratio_)) +plt.xlabel('Number of Components') +plt.ylabel('Variance (%)') #for each component +plt.title('Pulsar Dataset Explained Variance') +plt.savefig('pca.png') +plt.show() + diff --git a/models/PCA/pca.py b/models/PCA/pca.py new file mode 100644 index 0000000..869570f --- /dev/null +++ b/models/PCA/pca.py @@ -0,0 +1,35 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler + +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.model_selection import train_test_split +col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] +# load dataset +data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) +data = data.dropna() +# feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +# feature_cols = ['pole','prediction'] +X = data[feature_cols] +y = data.label +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features +from sklearn.decomposition import PCA + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) +pca = PCA(n_components=6) +X_train = pca.fit_transform(X_train) +X_test = pca.transform(X_test) + + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',degree = 7,class_weight='balanced', random_state=0) +svclassifier.fit(X_train, y_train) +y_pred = svclassifier.predict(X_test) +from sklearn.metrics import classification_report, confusion_matrix +print(confusion_matrix(y_test,y_pred)) +print(classification_report(y_test,y_pred)) + diff --git a/models/hybrid/hybrid.py b/models/hybrid/hybrid.py new file mode 100644 index 0000000..01fa9e9 --- /dev/null +++ b/models/hybrid/hybrid.py @@ -0,0 +1,85 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./svmrbftrainprobility.csv") +data = data.dropna() +feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='rbf',class_weight='balanced') +model = svclassifier.fit(X_train, y_train) + + +# instantiate the model (using the default parameters) + +# fit the model with data +model.fit(X_train, y_train) +from sklearn.externals import joblib +from joblib import dump, load +dump(model, 'svmrbfhybrid.joblib') +# model = load('svmrbfhybrid.joblib') + +from sklearn import metrics + + + + +datatest = pd.read_csv("./svmrbftestpro.csv") +datatest = datatest.dropna() +feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) + + +df = pd.DataFrame(datatest) +df.insert(25, "hybrid", y_predict, True) + +export_csv = df.to_csv ('./svmrbftestprohybrid.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['hybrid',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['hybrid',time]) +csvfile.close() + + + + + + + + + diff --git a/models/hybrid/linear_model/linearmodel.py b/models/hybrid/linear_model/linearmodel.py new file mode 100644 index 0000000..5948d96 --- /dev/null +++ b/models/hybrid/linear_model/linearmodel.py @@ -0,0 +1,62 @@ +mport pandas as pd +import numpy as np +from sklearn.linear_model import LogisticRegression +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler + +from sklearn.metrics import classification_report, confusion_matrix +col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] +# load dataset +data = pd.read_csv("./train/vgg_predict.csv", header=None, names=col_names) +data = data.dropna() +# feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] +# feature_cols = ['pole','prediction'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +X_train = X +y_train = y +from sklearn import linear_model + +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn.linear_model import LogisticRegressionCV +from sklearn.linear_model import RidgeClassifier +from sklearn.linear_model import RidgeClassifierCV +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.datasets import make_classification +X, y = make_classification(n_features=4, random_state=0) +model =PassiveAggressiveClassifier(max_iter=1000, random_state=0,tol=1e-3,class_weight = 'balanced') + + +# fit the model with data +model.fit(X_train, y_train) + +col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label','lrpredict','svmpredict'] +# load dataset +data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) + +data = data.dropna() +# feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] + +X1 = data[feature_cols] + +scaler = StandardScaler() +X1 = scaler.fit_transform(X1)# Features + +y1 = data.label # Target variable + + +y_pred1 = model.predict(X1) + + + +print(confusion_matrix(y1,y_pred1 )) +print(classification_report(y1,y_pred1 )) + diff --git a/models/hybrid/union_differentweights.py b/models/hybrid/union_differentweights.py new file mode 100644 index 0000000..c7277fd --- /dev/null +++ b/models/hybrid/union_differentweights.py @@ -0,0 +1,36 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +dataset1 = pd.read_csv("./non_split_test_result.csv") +dataset1 = dataset1.dropna() +df = pd.DataFrame(dataset1) + +# def f(x,y): +# # print(x,y) +# return round(0.5*x + 0.5*y) + +ytest1 = dataset1.label + + +y_predict1=dataset1.hard_pred_label +print(confusion_matrix(ytest1, y_predict1)) +tn, fp, fn, tp = confusion_matrix(ytest1, y_predict1, labels=[0,1]).ravel() +print(tn,fp,fn,tp) + + diff --git a/models/logical_regression/logical_model_data.py b/models/logical_regression/logical_model_data.py new file mode 100644 index 0000000..e352be8 --- /dev/null +++ b/models/logical_regression/logical_model_data.py @@ -0,0 +1,392 @@ +# OpenCV lib +import os +import tensorflow as tf +import cv2 +from skimage.segmentation import slic +from skimage import color +from skimage import data +from skimage import io +# Traverse files +import glob as gb +import tensorflow as tf +# Math lib +import numpy as np +import time +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import math +import csv + +from matplotlib.pyplot import imshow +import matplotlib.pyplot as plt +import matplotlib.image as mpimg + + +def kmeans(img): + # K-means + # Convert image to one dimension data + img_ori = img.copy() + img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + Z = img.reshape((-1, 3)) + # Z = img.reshape((-1, 3)) + Z = np.float32(Z) + # define criteria, number of clusters(K) and apply kmeans() + criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) + K =5 + # Run k-means + # ret: compactness + # labels: + # centers: array of centers of clusters + ret, label, center = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS) + # Now convert back into uint8, and make original image + center = np.uint8(center) + res = center[label.flatten()] + res2 = res.reshape(img.shape) + res2_gray = cv2.cvtColor(res2, cv2.COLOR_BGR2GRAY) + + hist = res2_gray.ravel() + hist = set(hist) + hist = sorted(hist) + # print(len(hist)) + threshold = [] + tag=[] + tag1 = [] + tag_dilate3 = [] + tag_dilate5 = [] + tag_dilate7 = [] + tag_close3 = [] + tag_close5 = [] + tag_close7 = [] + for i in range(len(hist)-1): + threshold.append(int(hist[i]/2 + hist[i+1]/ 2)) + # no dilate , not accurate + kernal3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) + kernal5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) + kernal7 = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) + for j in range(len(hist)-1): + if j ==(len(hist)-2): + dia=cv2.inRange(res2_gray, threshold[j], 255) + tag.append(dia) + tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) + tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) + tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) + else: + dia = cv2.inRange(res2_gray, threshold[j], threshold[j+1]) + tag.append(dia) + tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) + tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) + tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) + + for j in range(len(hist) - 1): + if j == (len(hist) - 2): + dia1 = cv2.inRange(res2_gray, threshold[j], 255) + tag1.append(dia1) + + tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) + tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) + tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) + else: + dia1 = cv2.inRange(res2_gray, threshold[j], threshold[j + 1]) + tag1.append(dia1) + tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) + tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) + tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) + + # return(tag,tag_dilate3,tag_close3, tag_dilate5,tag_close5, tag_dilate7, tag_close7 ,hist) + return (tag, hist, tag_close3, tag_dilate5, tag_close5, tag_dilate7, tag_close7, hist) +# the kernel number is returned , use kernel 3 temporiarly. + +# find contours based on kmeans method +def find_contours(img, mask_list): + # Get the area of roof + masks_length = len(mask_list) + cont = [] + for i in range(0, masks_length): + c, h = cv2.findContours(mask_list[i], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + for contour in c: + cont.append(contour) +# cv2.drawContours(img, cont, -1, (0, 0, 255), 2) + return [img,cont] + +# use size filter +def filter_size(img,contour): + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + roof_area = cal_roofarea(image_grayscale)[0] + cont = [] + for c in contour: + area = cv2.contourArea(c) + if (area >0): + ratio = area / roof_area + if ((area >800) & (ratio < 0.5)): + cont.append(c) + areas = [] + for i, co in enumerate(cont): + areas.append((i, cv2.contourArea(co),co)) + + a2 = sorted(areas, key=lambda d: d[1], reverse=True) + # cv2.drawContours(img, cont, -1, (0, 0, 255), 2) + # cv2.imshow('img',img) + # cv2.waitKey(0) + return [img,a2] + +# calculate the roof area so we can remove a part of the contours +def cal_roofarea(image): + black = cv2.threshold(image, 0, 255, 0)[1] + contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) + area = [cv2.contourArea(c) for c in contours] + roof_index = np.argmax(area) + roof_cnt = contours[roof_index] + # contourArea will return the wrong value if the contours are self-intersections + roof_area = cv2.contourArea(roof_cnt) + #print('roof area = '+ str(roof_area)) + return (roof_area,roof_cnt) + +# calculate the mean pixel value in the contours +def getContourStat(img,contour): + mask = np.zeros(img.shape,dtype="uint8") + cv2.drawContours(mask, [contour], -1, 255, -1) + mean,stddev = cv2.meanStdDev(img,mask=mask) + return mean, stddev + + +# use to show the result of kmeans + +def get_mask(img,mask_list): + masks_length = len(mask_list) + mask_color = [(255,0,0),(0,255,0),(0,0,255),(255,255,255),(128,128,128),(0,0,0)] + for i in range(0, masks_length): + img[mask_list[i]!= 0] = mask_color[i] + return img + + +def pole(img, contour): + ori_img = img.copy() + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + cont = cal_roofarea(image_grayscale)[1] + cv2.drawContours(ori_img, cont, -1, (255, 0, 0), 3) + #print(len(contour)) + contour_res =[] + back = 1 + cnt = contour + leftmost = tuple(cnt[cnt[:, :, 0].argmin()][0]) + rightmost = tuple(cnt[cnt[:, :, 0].argmax()][0]) + topmost = tuple(cnt[cnt[:, :, 1].argmin()][0]) + bottommost = tuple(cnt[cnt[:, :, 1].argmax()][0]) + pole = [leftmost,rightmost,topmost,bottommost] + for point in pole: + # check the distance with contours, biggest contour + # when it is negative, means the point is outside the contours + dist = cv2.pointPolygonTest(cont, point, True) + # print(dist) + if (dist <=0): + back = 0 + else: + pass + + return (ori_img,contour,back) +def rotate_rectangle(img_name,img, contour): + + shape= {} + shape['id'] = img_name +# for c in contour: + c = contour + + area = cv2.contourArea(c) + x,y,w,h = cv2.boundingRect(c) + ratiowh = min(float(w/h),float(h/w)) + shape['ratiowh'] = ratiowh + + ratioarea = float(area/(w*h)) + shape['ratioarea'] = ratioarea + + epsilon = 0.01 * cv2.arcLength(c, True) + approx = cv2.approxPolyDP(c, epsilon, True) + + approxlen = len(approx) + shape['approxlen'] = approxlen + + + # the original num set to be -1 to be different no operation + num_angle = 0 + num_angle90 = -1 + num_angle80 = -1 + num_angle70 = -1 + + mask = np.zeros(img.shape, np.uint8) + cv2.drawContours(mask, [approx], -1, (255, 255, 255), -1) + cv2.drawContours(img, [approx], -1, (255, 255, 255), 2) + # mask = np.concatenate((mask, mask, mask), axis=-1) + gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + contour_list = [] + ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) + contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # get the list of contours + for points in contours[0]: + x, y = points.ravel() + contour_list.append([x, y]) + corners = cv2.goodFeaturesToTrack(gray, 50, 0.01, 10) + corners = np.int0(corners) + for i in corners: + x, y = i.ravel() + # decide whether the corner is on the contours + if (cv2.pointPolygonTest(contours[0], (x, y), True) == 0): + center_index = contour_list.index([x, y]) + length = len(contour_list) + # get the point three before, and ignore the end point + a_index = center_index - 5 + b_index = center_index + 5 + if ((a_index > 0) & (b_index > 0) & (a_index < length)& (b_index < length)): + xa, ya = contour_list[a_index] + xb, yb = contour_list[b_index] + # print(x , y) + # print(xa, ya) + a = math.sqrt((x - xa) * (x - xa) + (y - ya) * (y - ya)) + b = math.sqrt((x - xb) * (x - xb) + (y - yb) * (y - yb)) + c = math.sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb)) + if ((a > 0) & (b > 0)): + if(((a * a + b * b - c * c) / (2 * a * b))<1) & (((a * a + b * b - c * c) / (2 * a * b) >-1)): + angle = math.degrees(math.acos((a * a + b * b - c * c) / (2 * a * b))) + num_angle =num_angle +1 + # print(angle) + if (angle < 90): + num_angle90 = num_angle90 + 1 + if (angle < 80): + num_angle80 = num_angle80 + 1 + if (angle < 70): + num_angle70 = num_angle70 + 1 + cv2.circle(img, (x, y), 5, 255, -1) + + shape['numangle'] = num_angle + shape['numangle90'] = num_angle90 + shape['numangle80'] = num_angle80 + shape['numangle70'] = num_angle70 +# print(shape) + # with open(csv_path, 'a') as csv_file: + # writer = csv.writer(csv_file) + # # writer.writerow(['image_id','size','pole','mean','square']) + # writer.writerow([shape['id'],shape['ratiowh'], shape['ratioarea'],shape['approxlen'],shape['numangle'],shape['numangle90'],shape['numangle80'],shape['numangle70']]) + # # for key, value in contour.items(): + # # writer.writerow([key, value]) + # csv_file.close() + + return(shape) +def mean(img,contour): + cont_res = [] + ori_img= img.copy() + + img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean_filter = 0 + c = contour + mean = getContourStat(image_grayscale,c)[0] + hist = kmeans(img)[1] + if (mean[0][0] <= (hist[2]+5)): + # mean = 1 means panel + mean_filter= 1 + + else: + # pass + mean_filter = 0 + # print(mean) +# cv2.drawContours(ori_img, cont_res, -1, (0, 0, 255), -1) + return(ori_img,cont_res,mean_filter) + +def main(): + + + path = './model/' + model = tf.keras.models.load_model(os.path.join(path,'20191003-010747.hdf5')) + num = 0 + csvpath = './lrtrainhouse7poss.csv' + with open(csvpath, 'a') as csvfile: + myFields = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','vgg','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() + csvfile.close() + CATEGORIES = ["panel", "nopanel"] + IMG_SIZE = 150 + img_path = gb.glob("./house7/*.png") + # store the information of contours(the label) + for path in img_path: + contour = {} + img_name = path.split("/")[-1] + img_name = img_name.split(".")[0] + # print(img_name) + # original image + img = cv2.imread(path) + # this is to show the contours so we can label right + img_contour = img.copy() +# tag = kmeans(img.copy())[2] + tag = kmeans(img)[2] +# masks = get_mask(img, tag) + # get the contours + img_contours= find_contours(img, tag)[0] + contours = find_contours(img, tag)[1] + # filter: to remove the contours which is less than 1 block of solar panel + img_size = filter_size(img, contours)[0] + contourinfo = filter_size(img, contours)[1] + # conotur_num is to tag the contours on the image + contour_num = 0 + rank = 0 + for i, area, c in contourinfo: + contour = {} + rank = rank + 1 + contour['id'] = str(img_name) + '_' + str(rank) + print(contour['id']) + contour['image'] = str(img_name) + contour['size'] = area +# contour['cont'] = c + contour['pole'] = pole(img.copy(), c)[2] + # print(contour['pole']) + # if the value is 1, means it maybe panel + contour['mean'] = mean(img.copy(), c)[2] + # print(contour['mean']) + area = cv2.contourArea(c) + perimeter = cv2.arcLength(c, True) + sq = 4 * math.pi * area / (perimeter * perimeter) + contour['square'] = sq + # print(sq) + shape = rotate_rectangle(img_name,img.copy(), c) + contour['ratiowh'] = shape['ratiowh'] + contour['ratioarea'] = shape['ratioarea'] + contour['approxlen'] = shape['approxlen'] + contour['numangle'] = shape['numangle'] + contour['numangle90'] = shape['numangle90'] + contour['numangle70'] = shape['numangle70'] + csv_path = './contourlabel1.csv' + with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if(row['id']==contour['id']): +# print(row['id'],row['label']) + contour['label'] = row['label'] +# num = num + 1 + vgg_image = img.copy() + mask = np.zeros_like(img) + img2gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + mask = cv2.drawContours(img2gray, [c], 0, (255, 255, 255, 1), -1) + img_result = cv2.bitwise_or(vgg_image, vgg_image, mask=mask) + cv2.imshow(img_result) + cv2.waitKey(0) + img_result = cv2.resize(img_result, (IMG_SIZE, IMG_SIZE)) + testimg = (img_result.reshape(-1, IMG_SIZE, IMG_SIZE, 3)).astype('int32')/255 + prediction = model.predict(testimg) + contour['vgg'] = prediction[0][0] + # if ((prediction[0][0]) > (0.5)): + # contour['vgg'] = 1 + # else: + # contour['vgg'] = 0 + print(contour) + with open(csvpath, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'], contour['image'],contour['size'],contour['pole'],contour['mean'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['vgg'],contour['label']]) + csvfile.close() + print('finish') +main() + + + + + + diff --git a/models/logical_regression/logical_model_test.py b/models/logical_regression/logical_model_test.py new file mode 100644 index 0000000..6426018 --- /dev/null +++ b/models/logical_regression/logical_model_test.py @@ -0,0 +1,49 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +col_names = ['id','image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] +# load dataset +data = pd.read_csv("./final/nosplit/test/vgg_predict.csv", header=None, names=col_names) +data = data.dropna() +feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] +X = data[feature_cols] + +scaler = StandardScaler() +testX = scaler.fit_transform(X)# Features + +testy = data.label # Target variable + + +filename ='./' + 'RLmodel.sav' +# pickle.dump(model, open(filename, 'wb')) + +model = pickle.load(open(filename, 'rb')) +lr_probs = model.predict_proba(testX) +# keep probabilities for the positive outcome only +lr_probs = lr_probs[:, 1] +# calculate scores +ns_auc = roc_auc_score(testy, ns_probs) +lr_auc = roc_auc_score(testy, lr_probs) +# summarize scores +print('No Skill: ROC AUC=%.3f' % (ns_auc)) +print('Logistic: ROC AUC=%.3f' % (lr_auc)) +# calculate roc curves +ns_fpr, ns_tpr, _ = roc_curve(testy, ns_probs) +lr_fpr, lr_tpr, _ = roc_curve(testy, lr_probs) +# plot the roc curve for the model +pyplot.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill') +pyplot.plot(lr_fpr, lr_tpr, marker='.', label='Logistic') +# axis labels +pyplot.xlabel('False Positive Rate') +pyplot.ylabel('True Positive Rate') +# show the legend +pyplot.legend() +# show the plot +pyplot.show() + + diff --git a/models/logical_regression/logical_model_train.py b/models/logical_regression/logical_model_train.py new file mode 100644 index 0000000..dc7d618 --- /dev/null +++ b/models/logical_regression/logical_model_train.py @@ -0,0 +1,99 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +col_names = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction','prediction_class','label'] +# load dataset +data = pd.read_csv("./location810/lr.csv", header=None, names=col_names) +data = data.dropna() +feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +from sklearn.model_selection import train_test_split +X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) + +from sklearn.linear_model import LogisticRegression +from sklearn import metrics + +# instantiate the model (using the default parameters) +model = LogisticRegression(class_weight = 'balanced') + +# fit the model with data +model.fit(X_train, y_train) +print(model.coef_ ) +print(model.intercept_ ) +filename = 'RLmodel.sav' +pickle.dump(model, open(filename, 'wb')) + +loaded_model = pickle.load(open(filename, 'rb')) +result = loaded_model.score(X_test, y_test) +print(result) +y_predict= model.predict(X_test) +print("Y predict/hat ", y_predict) +print(metrics.confusion_matrix(y_test, y_predict)) + + + + + + + + + +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +col_names = ['id','image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] +# load dataset +data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) +data = data.dropna() +feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + + +filename ='./nosplit/' + 'RLmodel.sav' +# pickle.dump(model, open(filename, 'wb')) + +loaded_model = pickle.load(open(filename, 'rb')) +result = loaded_model.score(X, y) +print(result) +y_predict= loaded_model.predict(X) +print("Y predict/hat ", y_predict) +print(metrics.confusion_matrix(y, y_predict)) + +y_predict= loaded_model.predict(X) +print(y_predict) + + +# Convert the dictionary into DataFrame +df = pd.DataFrame(data) + +# Using DataFrame.insert() to add a column +df.insert(15, "predict", y_predict, True) + +# Observe the result + +export_csv = df.to_csv ('./vgg_predict.csv', index = None, header=False) + + + + diff --git a/models/logical_regression/lrmodel.py b/models/logical_regression/lrmodel.py new file mode 100644 index 0000000..9d416ff --- /dev/null +++ b/models/logical_regression/lrmodel.py @@ -0,0 +1,63 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) + +from sklearn.linear_model import LogisticRegression +from sklearn import metrics + +# instantiate the model (using the default parameters) +model = LogisticRegression(class_weight = 'balanced') +X_train = X +y_train = y +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] +print(y_pro) + + +df = pd.DataFrame(datatest) +df.insert(23, "lr_class", y_predict, True) +df.insert(24, "lr_pro", y_pro , True) +export_csv = df.to_csv ('./lrmodel.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([tn,fp,fn,tp]) +csvfile.close() + + diff --git a/models/logical_regression/vgg.py b/models/logical_regression/vgg.py new file mode 100644 index 0000000..b08f48c --- /dev/null +++ b/models/logical_regression/vgg.py @@ -0,0 +1,34 @@ +from keras.models import Sequential +from keras.layers import Dense +import numpy +import os +# fix random seed for reproducibility +seed = 7 +numpy.random.seed(seed) +# load pima indians dataset +dataset = numpy.loadtxt('./lr_train.csv', delimiter=",") +# split into input (X) and output (Y) variables +X = dataset[:,2:13] +Y = dataset[:,14] +# create model +model = Sequential() +model.add(Dense(12, input_dim=11, init='uniform', activation='relu')) +model.add(Dense(8, init='uniform', activation='relu')) +model.add(Dense(1, init='uniform', activation='sigmoid')) +# Compile model +model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Fit the model +model.fit(X, Y, nb_epoch=5, batch_size=128) +datasettest = numpy.loadtxt('./vggtest.csv', delimiter=",") +# split into input (X) and output (Y) variables +Xtest = datasettest[:,2:13] +Ytest = datasettest[:,14] + + +# evaluate the model +scores = model.evaluate(Xtest, Ytest) +print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) + + + + + diff --git a/models/logical_regression/vgg_physical_integration.py b/models/logical_regression/vgg_physical_integration.py new file mode 100644 index 0000000..986950e --- /dev/null +++ b/models/logical_regression/vgg_physical_integration.py @@ -0,0 +1,42 @@ +import csv +physical_feature_path = './location17/contour_all.csv' +vgg_predict_path = './location17/vgg_predict.csv' +lr_path = './location17/lr.csv' + +with open(lr_path, 'a') as csvfile: + myFields = ['id', 'location', 'image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label',] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +with open(physical_feature_path, newline='') as phyfile: + contour = {} + reader = csv.DictReader(phyfile) + for phy in reader: + contour = phy + + with open(vgg_predict_path, newline='') as vggfile: + reader = csv.DictReader(vggfile) + for vgg in reader: + if (vgg['id'] ==contour['id']): + contour['prediction'] = vgg['prediction'] + contour['prediction_class'] = vgg['prediction_class'] + vggfile.close() + with open(lr_path, 'a') as lrfile: + writer = csv.writer(lrfile) + writer.writerow([contour['id'], contour['location'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['stddev'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['prediction'],contour['prediction_class'],contour['label']]) + lrfile.close() +phyfile.close() + + + + + + + + + + + + + diff --git a/models/random_forest/random_forest.py b/models/random_forest/random_forest.py new file mode 100644 index 0000000..fc6c93c --- /dev/null +++ b/models/random_forest/random_forest.py @@ -0,0 +1,66 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler +%matplotlib inline + +col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] +# load dataset +data = pd.read_csv(".vgg_predict.csv", header=None, names=col_names) +data = data.dropna() +# feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +# feature_cols = ['pole','prediction'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +X_train = X +y_train = y +from sklearn.ensemble import RandomForestClassifier +clf = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0,class_weight='balanced') + +model = clf.fit(X_train, y_train) +# # y_pred = svclassifier.predict(X_test) +# # from sklearn.metrics import classification_report, confusion_matrix +# # print(confusion_matrix(y_test,y_pred)) +# # print(classification_report(y_test,y_pred)) + +col_names = ['id', 'image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label','lrpredict','svmpredict'] +# load dataset +data = pd.read_csv("./vgg_predict.csv", header=None, names=col_names) + +data = data.dropna() +# # feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +feature_cols = ['pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] + +X1 = data[feature_cols] + +scaler = StandardScaler() +X1 = scaler.fit_transform(X1)# Features + +y1 = data.label # Target variable +y_pred1 = model.predict(X1) + + +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.metrics import accuracy_score +from sklearn.metrics import cohen_kappa_score +from sklearn import metrics +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import average_precision_score +from sklearn.metrics import matthews_corrcoef +from sklearn.metrics import roc_auc_score +from sklearn.metrics import balanced_accuracy_score +print(confusion_matrix(y1,y_pred1)) +print(classification_report(y1,y_pred1)) +print(accuracy_score(y1,y_pred1)) +print(balanced_accuracy_score(y1,y_pred1)) +print(metrics.precision_score(y1,y_pred1)) +print(metrics.recall_score(y1,y_pred1)) +print(metrics.f1_score(y1,y_pred1)) +print(matthews_corrcoef(y1,y_pred1)) +print(roc_auc_score(y1,y_pred1)) \ No newline at end of file diff --git a/models/svm/svm10.py b/models/svm/svm10.py new file mode 100644 index 0000000..36d04f5 --- /dev/null +++ b/models/svm/svm10.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=10, random_state=0,probability=True) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_poly10_class", y_predict, True) +df.insert(24, "svm_poly10_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly10.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly10',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly10',time]) +csvfile.close() + + + + + + + + diff --git a/models/svm/svm2.py b/models/svm/svm2.py new file mode 100644 index 0000000..9ff5a2f --- /dev/null +++ b/models/svm/svm2.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() +data = pd.read_csv("./feature_train_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +X_train = X +y_train = y + + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=2, random_state=0) +model = svclassifier.fit(X_train, y_train) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_test_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + + +y_predict= model.predict(Xtest) +# y_pro = model.predict_proba(Xtest)[:,1] + + +df = pd.DataFrame(datatest) +df.insert(22, "svm2_class", y_predict, True) +# df.insert(23, "svm6_pro", y_pro , True) +export_csv = df.to_csv ('./svm2.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm2',tn,fp,fn,tp]) +csvfile.close() + +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm2',time]) +csvfile.close() + + + + + + + diff --git a/models/svm/svm3.py b/models/svm/svm3.py new file mode 100644 index 0000000..8eb3f7b --- /dev/null +++ b/models/svm/svm3.py @@ -0,0 +1,81 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=3, random_state=0) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_linear_class", y_predict, True) +df.insert(24, "svm_linear_pro", y_predict, True) +export_csv = df.to_csv ('./output/svm_poly3.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly3',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly3',time]) +csvfile.close() + + + + + + + diff --git a/models/svm/svm4.py b/models/svm/svm4.py new file mode 100644 index 0000000..441b8c2 --- /dev/null +++ b/models/svm/svm4.py @@ -0,0 +1,81 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=4, random_state=0,probability=True) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_poly4_class", y_predict, True) +df.insert(24, "svm_poly4_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly4.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly4',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly4',time]) +csvfile.close() + + + + + + + diff --git a/models/svm/svm5.py b/models/svm/svm5.py new file mode 100644 index 0000000..62a02dd --- /dev/null +++ b/models/svm/svm5.py @@ -0,0 +1,80 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=5, random_state=0,probability=True) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_poly5_class", y_predict, True) +df.insert(24, "svm_poly5_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly5.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly5',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly5',time]) +csvfile.close() + + + + + + + + diff --git a/models/svm/svm6.py b/models/svm/svm6.py new file mode 100644 index 0000000..139a6b4 --- /dev/null +++ b/models/svm/svm6.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=6, random_state=0,probability=True) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_poly6_class", y_predict, True) +df.insert(24, "svm_poly6_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly6.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly6',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly6',time]) +csvfile.close() + + + + + + + + diff --git a/models/svm/svm7.py b/models/svm/svm7.py new file mode 100644 index 0000000..0f0e1ba --- /dev/null +++ b/models/svm/svm7.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=7, random_state=0) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_poly7_class", y_predict, True) +df.insert(24, "svm_poly7_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly7.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly7',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly7',time]) +csvfile.close() + + + + + + + + diff --git a/models/svm/svm8.py b/models/svm/svm8.py new file mode 100644 index 0000000..7357816 --- /dev/null +++ b/models/svm/svm8.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=8, random_state=0,probability=True) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_poly8_class", y_predict, True) +df.insert(24, "svm_poly8_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly8.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly8',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly8',time]) +csvfile.close() + + + + + + + + diff --git a/models/svm/svm9.py b/models/svm/svm9.py new file mode 100644 index 0000000..cee7e96 --- /dev/null +++ b/models/svm/svm9.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=9, random_state=0,probability=True) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_poly9_class", y_predict, True) +df.insert(24, "svm_poly9_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly9.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly9',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly9',time]) +csvfile.close() + + + + + + + + diff --git a/models/svm/svm_roc.py b/models/svm/svm_roc.py new file mode 100644 index 0000000..49de7b6 --- /dev/null +++ b/models/svm/svm_roc.py @@ -0,0 +1,136 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() +%matplotlib inline + + + +data = pd.read_csv("./svmrbftrainprobility.csv") +data = data.dropna() +# feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] +feature_cols = ['vgg_pro','svmrbfpro'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + + +X_train = X +y_train = y + + + + +# use linear regression +from sklearn.linear_model import LogisticRegression +model = LogisticRegression(class_weight = 'balanced') + +# instantiate the model (using the default parameters) + +# fit the model with data +model.fit(X_train, y_train) +# from sklearn.externals import joblib +# from joblib import dump, load +# dump(model, 'svmrbfhybrid.joblib') +# model = load('svmrbfhybrid.joblib') +print(model.coef_ ) +print(model.intercept_ ) +from sklearn import metrics + + + + +datatest = pd.read_csv("./svmrbftestpro.csv") +datatest = datatest.dropna() +# feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] +feature_cols = ['vgg_pro','svmrbfpro'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable +y_predict_vgg = datatest.vgg_pro +y_predict_svm = datatest.svmrbfpro + + + +y_predict= model.predict(Xtest) +y_predict_pro = model.predict_proba(Xtest) +y_predict_pro = y_predict_pro[:, 1] + + + +df = pd.DataFrame(datatest) +df.insert(25, "svm_nosplit_pro", y_predict_pro, True) +df.insert(26, "svm_nosplit_class", y_predict, True) + +export_csv = df.to_csv ('./vggsvmlogicalregression2features.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['vggsvmlogicalregression2features.csv',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['vggsvmlogicalregression2features.csv',time]) +csvfile.close() + + + +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.metrics import accuracy_score +from sklearn.metrics import cohen_kappa_score +from sklearn import metrics +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import average_precision_score +from sklearn.metrics import matthews_corrcoef +from sklearn.metrics import roc_auc_score +from sklearn.metrics import balanced_accuracy_score +from sklearn.metrics import roc_curve +from matplotlib import pyplot +print(confusion_matrix(ytest, y_predict)) +print(classification_report(ytest, y_predict)) +print(accuracy_score(ytest, y_predict)) +print(balanced_accuracy_score(ytest, y_predict)) +print(metrics.precision_score(ytest, y_predict)) +print(metrics.recall_score(ytest, y_predict)) +print(metrics.f1_score(ytest, y_predict)) +print(matthews_corrcoef(ytest, y_predict)) +print(roc_auc_score(ytest, y_predict)) +print(roc_auc_score(ytest, y_predict_vgg )) +print(roc_auc_score(ytest, y_predict)) +lr_fpr, lr_tpr, _ = roc_curve(ytest, y_predict_pro) +lr_fpr_vgg, lr_tpr_vgg, _ = roc_curve(ytest, y_predict_vgg ) +lr_fpr_svm, lr_tpr_svm, _ = roc_curve(ytest, y_predict_svm) +pyplot.plot(lr_fpr, lr_tpr, marker='x', label='Logistic') +pyplot.plot(lr_fpr_vgg, lr_tpr_vgg, marker='o', label='vgg') +pyplot.plot(lr_fpr_svm, lr_tpr_svm, marker='v', label='svm kernel=rbf') +pyplot.xlabel('False Positive Rate',{'size': 14}) +pyplot.ylabel('True Positive Rate',{'size': 14}) +# show the legend +pyplot.legend() +pyplot.tight_layout() +pyplot.savefig('./split_roc.png') +# show the plot +pyplot.show() + + + + + + + + diff --git a/models/svm/svmaggressive.py b/models/svm/svmaggressive.py new file mode 100644 index 0000000..a3bfa76 --- /dev/null +++ b/models/svm/svmaggressive.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.linear_model import PassiveAggressiveClassifier +svclassifier = PassiveAggressiveClassifier(max_iter=1000, random_state=0,tol=1e-3,class_weight='balanced') +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "PassiveAggressive", y_predict, True) +df.insert(24, "PassiveAggressive", y_predict, True) +export_csv = df.to_csv ('./PassiveAggressive.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./split/result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['PassiveAggressive',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./split/time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['PassiveAggressive',time]) +csvfile.close() + + + + + + + + diff --git a/models/svm/svmlinear.py b/models/svm/svmlinear.py new file mode 100644 index 0000000..bfd3b72 --- /dev/null +++ b/models/svm/svmlinear.py @@ -0,0 +1,69 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='linear',class_weight='balanced',probability=True) +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_linear_class", y_predict, True) +df.insert(24, "svm_linear_pro", y_predict, True) +export_csv = df.to_csv ('./output/svm_linear.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_linear',tn,fp,fn,tp]) +csvfile.close() + + + + diff --git a/models/svm/svmnosplit.py b/models/svm/svmnosplit.py new file mode 100644 index 0000000..3fffbef --- /dev/null +++ b/models/svm/svmnosplit.py @@ -0,0 +1,34 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler +%matplotlib inline + +data = pd.read_csv("./vgg_predict.csv") +data = data.dropna() + + +df = pd.DataFrame(data) +y1 = df.iloc[:,14].astype(int) +print(y1) +y_pred1 = df.iloc[:,16].astype(int) + + +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.metrics import accuracy_score +from sklearn.metrics import cohen_kappa_score +from sklearn import metrics +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import average_precision_score +from sklearn.metrics import matthews_corrcoef +from sklearn.metrics import roc_auc_score +from sklearn.metrics import balanced_accuracy_score +print(confusion_matrix(y1,y_pred1)) +print(classification_report(y1,y_pred1)) +print(accuracy_score(y1,y_pred1)) +print(balanced_accuracy_score(y1,y_pred1)) +print(metrics.precision_score(y1,y_pred1)) +print(metrics.recall_score(y1,y_pred1)) +print(metrics.f1_score(y1,y_pred1)) +print(matthews_corrcoef(y1,y_pred1)) +print(roc_auc_score(y1,y_pred1)) \ No newline at end of file diff --git a/models/svm/svmrbf.py b/models/svm/svmrbf.py new file mode 100644 index 0000000..4a32b2c --- /dev/null +++ b/models/svm/svmrbf.py @@ -0,0 +1,77 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + + +X_train = X +y_train = y + +from sklearn.svm import SVC +svclassifier = SVC(kernel='rbf',class_weight='balanced') +model = svclassifier.fit(X_train, y_train) + + + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "svm_linear_class", y_predict, True) +df.insert(24, "svm_linear_pro", y_predict, True) +export_csv = df.to_csv ('./svm_poly3.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly3',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['svm_poly3',time]) +csvfile.close() + + + + + + + diff --git a/models/svm/svmridge.py b/models/svm/svmridge.py new file mode 100644 index 0000000..a7eb3a4 --- /dev/null +++ b/models/svm/svmridge.py @@ -0,0 +1,82 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() + + + +data = pd.read_csv("./feature_17_all.csv") +data = data.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + + +from sklearn.linear_model import RidgeClassifierCV +svclassifier = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1],class_weight='balanced') +model = svclassifier.fit(X_train, y_train) + + +from sklearn import metrics + +# instantiate the model (using the default parameters) + + +# fit the model with data +model.fit(X_train, y_train) + + + +datatest = pd.read_csv("./feature_810_all.csv") +datatest = datatest.dropna() +feature_cols = ['size','pole','mean','stddev','b_mean','g_mean','r_mean','b_stddev','g_stddev','r_stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable + +y_predict= model.predict(Xtest) +y_pro = model.predict_proba(Xtest)[:,1] + + + +df = pd.DataFrame(datatest) +df.insert(23, "RidgeClassifier_class", y_predict, True) +df.insert(24, "RidgeClassifier_pro", y_predict, True) +export_csv = df.to_csv ('./RidgeClassifier.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./split/result.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['RidgeClassifier',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +with open('./split/time.csv', 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['RidgeClassifier',time]) +csvfile.close() + + + + + + + diff --git a/models/svm/svmsplit.py b/models/svm/svmsplit.py new file mode 100644 index 0000000..f350723 --- /dev/null +++ b/models/svm/svmsplit.py @@ -0,0 +1,49 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler +%matplotlib inline + +col_names = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction','prediction_class','label'] +# load dataset +data = pd.read_csv("./lr.csv", header=None, names=col_names) +data = data.dropna() +# feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable +# from sklearn.model_selection import train_test_split +# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) +X_train = X +y_train = y +from sklearn.svm import SVC +svclassifier = SVC(kernel='poly',class_weight='balanced', degree=8, random_state=0) +svclassifier.fit(X_train, y_train) +# y_pred = svclassifier.predict(X_test) +# from sklearn.metrics import classification_report, confusion_matrix +# print(confusion_matrix(y_test,y_pred)) +# print(classification_report(y_test,y_pred)) + +col_names = ['id', 'location','image', 'size','pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction','prediction_class','label','lrpredict'] +# load dataset +data = pd.read_csv("./location810/lr.csv", header=None, names=col_names) +data = data.dropna() +# feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +feature_cols = ['pole','mean','stddev','square','ratiowh','ratioarea','approxlen','numangle','numangle90','prediction'] +X1 = data[feature_cols] + +scaler = StandardScaler() +X1 = scaler.fit_transform(X1)# Features + +y1 = data.label # Target variable +y_pred1 = svclassifier.predict(X1) + +from sklearn.metrics import classification_report, confusion_matrix +print(confusion_matrix(y1,y_pred1)) +print(classification_report(y1,y_pred1)) + + diff --git a/models/thresholding/append_new_column.py b/models/thresholding/append_new_column.py new file mode 100644 index 0000000..4e88d55 --- /dev/null +++ b/models/thresholding/append_new_column.py @@ -0,0 +1,23 @@ +import numpy as np +import pandas as pd + +loaded_model = pickle.load(open(filename, 'rb')) +result = loaded_model.score(X, y) +print(result) +y_predict= loaded_model.predict(X) +print("Y predict/hat ", y_predict) +print(metrics.confusion_matrix(y, y_predict)) + +y_predict= loaded_model.predict(X) +print(y_predict) + + +# Convert the dictionary into DataFrame +df = pd.DataFrame(data) + +# Using DataFrame.insert() to add a column +df.insert(15, "predict", y_predict, True) + +# Observe the result + +export_csv = df.to_csv ('./vgg_predict.csv', index = None, header=False) \ No newline at end of file diff --git a/models/thresholding/data_description.py b/models/thresholding/data_description.py new file mode 100644 index 0000000..f6ff441 --- /dev/null +++ b/models/thresholding/data_description.py @@ -0,0 +1,90 @@ +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +import csv + +# col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] +col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] + +data = pd.read_csv("./feature_17_all.csv", names=col_names) +data = data.dropna() + +g_outputDir = './output/final/split/' +csv_path = g_outputDir + 'feature_description.csv' + +positive_sample_set = data[data['label'] == 1.0] +negative_sample_set = data[data['label'] == 0.0] + +analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] +# analysis_features = ['size'] + +labels = ['mean', 'std', 'min', 'max', '50%', '25%','75%'] + +def get_whiskers(feature_array): + Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) + + IQR = Q3 - Q1 + + loval = Q1 - 1.5 * IQR + hival = Q3 + 1.5 * IQR + + upper_wisk_set = np.compress(feature_array <= hival, feature_array) + lower_wisk_set = np.compress(feature_array >= loval, feature_array) + upper_wisk = np.max(upper_wisk_set) + lower_wisk = np.min(lower_wisk_set) + + return [lower_wisk, upper_wisk] + +csv_header = ['feature', 'mean', 'std', 'min', 'max', 'median', '25%', '75%', '0.35%', '99.65%'] +with open(csv_path, 'a') as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=csv_header) + writer.writeheader() +csv_file.close() + +output = {} + +for analysis_feature in analysis_features: + + positive_sample_set_description = positive_sample_set[analysis_feature].describe() + print('positive_sample_set:') + + row_name = str(analysis_feature+'_pos') + + for l in labels: + output[l] = positive_sample_set_description[l] + + positive_whis = get_whiskers(positive_sample_set[analysis_feature]) + output['0.35%'] = positive_whis[0] + output['99.65%'] = positive_whis[1] + + print(output) + + with open(csv_path, 'a') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([row_name, output['mean'], output['std'], output['min'], output['max'], output['50%'], output['25%'], output['75%'], output['0.35%'], output['99.65%']]) + csv_file.close() + + + negative_sample_set_description = negative_sample_set[analysis_feature].describe() + print('negative_sample_set:') + row_name = str(analysis_feature+'_neg') + + for l in labels: + output[l] = negative_sample_set_description[l] + + negative_whis = get_whiskers(negative_sample_set[analysis_feature]) + output['0.35%'] = negative_whis[0] + output['99.65%'] = negative_whis[1] + + print(output) + + with open(csv_path, 'a') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([row_name, output['mean'], output['std'], output['min'], output['max'], output['50%'], output['25%'], output['75%'], output['0.35%'], output['99.65%']]) + csv_file.close() + + # input('Press ENTER to continue...') + + diff --git a/models/thresholding/hard_filters_test.py b/models/thresholding/hard_filters_test.py new file mode 100644 index 0000000..2d5980c --- /dev/null +++ b/models/thresholding/hard_filters_test.py @@ -0,0 +1,210 @@ +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +import csv + +# dataset = 'split' +dataset = 'non-split' + +# Generate hard filters + +if dataset == 'split': + col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] + #split training data path + training_data_csv_path = "./data/final/split/feature_17_all.csv" +elif dataset == 'non-split': + col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] + # non-split training data path + training_data_csv_path = "./data/final/non_split/feature_train_all.csv" +else: + print('No dataset is selected.') + exit() + +data = pd.read_csv(training_data_csv_path, names=col_names) +data = data.dropna() + +positive_sample_set = data[data['label'] == 1.0] +negative_sample_set = data[data['label'] == 0.0] + +analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] +# analysis_features = ['size'] + +number_of_features = len(analysis_features) + 1 + +labels = ['mean', 'std', 'min', 'max', '50%', '25%','75%'] + +def get_whiskers(feature_array): + Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) + + IQR = Q3 - Q1 + + loval = Q1 - 1.5 * IQR + hival = Q3 + 1.5 * IQR + + upper_wisk_set = np.compress(feature_array <= hival, feature_array) + lower_wisk_set = np.compress(feature_array >= loval, feature_array) + upper_wisk = np.max(upper_wisk_set) + lower_wisk = np.min(lower_wisk_set) + + return [lower_wisk, upper_wisk] + +hard_filters = {} + +for analysis_feature in analysis_features: + + hard_filters[analysis_feature] = {} + + positive_sample_set_description = positive_sample_set[analysis_feature].describe() + + positive_output = {} + + for l in labels: + positive_output[l] = positive_sample_set_description[l] + + positive_whis = get_whiskers(positive_sample_set[analysis_feature]) + positive_output['0.35%'] = positive_whis[0] + positive_output['99.65%'] = positive_whis[1] + + ############ + + negative_sample_set_description = negative_sample_set[analysis_feature].describe() + + negative_output = {} + + for l in labels: + negative_output[l] = negative_sample_set_description[l] + + negative_whis = get_whiskers(negative_sample_set[analysis_feature]) + negative_output['0.35%'] = negative_whis[0] + negative_output['99.65%'] = negative_whis[1] + + NU = negative_output['99.65%'] + NL = negative_output['0.35%'] + PU = positive_output['99.65%'] + PL = positive_output['0.35%'] + + if NU == PU and NL == PL: + hard_filters[analysis_feature]['filter_type'] = 'equal' + hard_filters[analysis_feature]['accept_zone'] = [] + hard_filters[analysis_feature]['reject_zone'] = [] + hard_filters[analysis_feature]['unsure_zone'] = [[NL, NU]] + elif NU >= PU and NL <= PL: + hard_filters[analysis_feature]['filter_type'] = 'contain' + hard_filters[analysis_feature]['accept_zone'] = [] + hard_filters[analysis_feature]['reject_zone'] = [[NL, PL], [PU, NU]] + hard_filters[analysis_feature]['unsure_zone'] = [[PL, PU]] + elif NU < PU and NU > PL and NL < PL: + hard_filters[analysis_feature]['filter_type'] = 'intersect' + hard_filters[analysis_feature]['accept_zone'] = [[NU, PU]] + hard_filters[analysis_feature]['reject_zone'] = [[NL, PL]] + hard_filters[analysis_feature]['unsure_zone'] = [[PL, NU]] + elif NL > PL and NL < PU and NU > PU: + hard_filters[analysis_feature]['filter_type'] = 'intersect' + hard_filters[analysis_feature]['accept_zone'] = [[PL, NL]] + hard_filters[analysis_feature]['reject_zone'] = [[PU, NU]] + hard_filters[analysis_feature]['unsure_zone'] = [[NL, PU]] + else: + hard_filters[analysis_feature]['filter_type'] = 'undefine' + hard_filters[analysis_feature]['accept_zone'] = [] + hard_filters[analysis_feature]['reject_zone'] = [] + hard_filters[analysis_feature]['unsure_zone'] = [] + # input('Press ENTER to continue...') +print(hard_filters) + +print('start testing...') + +# Test data + +def filter(feature_value, filters): + + feature_value = float(feature_value) + + possibility = 0.5 + + if len(filters['accept_zone']) != 0: + for r in filters['accept_zone']: + if feature_value >= float(r[0]) and feature_value <= float(r[1]): + possibility = 1 + return possibility + + if len(filters['reject_zone']) != 0: + for r in filters['reject_zone']: + if feature_value >= float(r[0]) and feature_value <= float(r[1]): + possibility = 0 + return possibility + + return possibility + +if dataset == 'split': + g_output_dir = './output/final/split/' + output_csv_path = g_output_dir + 'split_810_test_result.csv' + + g_test_data_dir = './data/final/split/' + test_data_csv_path = g_test_data_dir + 'feature_810_all.csv' + + output_csv_header = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'hard_pred_label', 'hard_pred_pos'] +elif dataset == 'non-split': + g_output_dir = './output/final/non_split/' + output_csv_path = g_output_dir + 'non_split_test_result.csv' + + g_test_data_dir = './data/final/non_split/' + test_data_csv_path = g_test_data_dir + 'feature_test_all.csv' + + output_csv_header = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'hard_pred_label', 'hard_pred_pos'] +else: + print('No dataset is selected.') + exit() + +with open(output_csv_path, 'a') as output_csv_file: + writer = csv.DictWriter(output_csv_file, fieldnames=output_csv_header) + writer.writeheader() +output_csv_file.close() + +with open(test_data_csv_path, newline='') as test_data_csv_file: + reader = csv.DictReader(test_data_csv_file) + for row in reader: + predict_label = 0 + predict_possibility = 0 + + total_possibility = 0 + + test_result = {} + + test_result['id'] = row['id'] + if dataset == 'split': + test_result['location'] = row['location'] + test_result['image'] = row['image'] + test_result['pole'] = row['pole'] + test_result['label'] = row['label'] + + for analysis_feature in analysis_features: + test_result[analysis_feature] = filter(row[analysis_feature], hard_filters[analysis_feature]) + if test_result[analysis_feature] == 1: + predict_label = 1 + + total_possibility += test_result[analysis_feature] + # input('Press ENTER to continue...') + + test_result['hard_pred_label'] = predict_label + + if predict_label == 1: + test_result['hard_pred_pos'] = 1 + else: + total_possibility += float(row['pole']) / 2 + test_result['hard_pred_pos'] = total_possibility / number_of_features + + with open(output_csv_path, 'a') as output_csv_file: + writer = csv.writer(output_csv_file) + if dataset == 'split': + writer.writerow([ test_result['id'], test_result['location'], test_result['image'], test_result['size'], test_result['pole'], test_result['mean'], test_result['stddev'], test_result['b_mean'], test_result['g_mean'], test_result['r_mean'], test_result['b_stddev'], test_result['g_stddev'], test_result['r_stddev'], test_result['square'], test_result['ratiowh'], test_result['ratioarea'], test_result['approxlen'], test_result['numangle'], test_result['numangle90'], test_result['numangle70'], test_result['label'], test_result['hard_pred_label'], test_result['hard_pred_pos']]) + if dataset == 'non-split': + writer.writerow([ test_result['id'], test_result['image'], test_result['size'], test_result['pole'], test_result['mean'], test_result['stddev'], test_result['b_mean'], test_result['g_mean'], test_result['r_mean'], test_result['b_stddev'], test_result['g_stddev'], test_result['r_stddev'], test_result['square'], test_result['ratiowh'], test_result['ratioarea'], test_result['approxlen'], test_result['numangle'], test_result['numangle90'], test_result['numangle70'], test_result['label'], test_result['hard_pred_label'], test_result['hard_pred_pos']]) + output_csv_file.close() + +test_data_csv_file.close() + +print('finished') + + diff --git a/models/thresholding/thresholding_model_generator.py b/models/thresholding/thresholding_model_generator.py new file mode 100644 index 0000000..c43b21f --- /dev/null +++ b/models/thresholding/thresholding_model_generator.py @@ -0,0 +1,139 @@ +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +import csv + +# dataset = 'split' +dataset = 'non-split' + + +if dataset == 'split': + col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] + #split training data path + training_data_csv_path = "./feature_17_all.csv" + + g_outputDir = './final/split/' + csv_path = g_outputDir + 'split_data_hard_filters.csv' +elif dataset == 'non-split': + col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] + # non-split training data path + training_data_csv_path = "./final/non_split/feature_train_all.csv" + + g_outputDir = './output/final/non_split/' + csv_path = g_outputDir + 'non_split_data_hard_filters.csv' +else: + print('No dataset is selected.') + exit() + +data = pd.read_csv(training_data_csv_path, names=col_names) +data = data.dropna() + +positive_sample_set = data[data['label'] == 1.0] +negative_sample_set = data[data['label'] == 0.0] + +analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] +# analysis_features = ['size'] + +labels = ['mean', 'std', 'min', 'max', '50%', '25%','75%'] + +def get_whiskers(feature_array): + Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) + + IQR = Q3 - Q1 + + loval = Q1 - 1.5 * IQR + hival = Q3 + 1.5 * IQR + + upper_wisk_set = np.compress(feature_array <= hival, feature_array) + lower_wisk_set = np.compress(feature_array >= loval, feature_array) + upper_wisk = np.max(upper_wisk_set) + lower_wisk = np.min(lower_wisk_set) + + return [lower_wisk, upper_wisk] + +csv_header = ['feature', 'filter_type', 'accept_zone', 'reject_zone', 'unsure_zone'] +with open(csv_path, 'a') as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=csv_header) + writer.writeheader() +csv_file.close() + +hard_filters = {} + +for analysis_feature in analysis_features: + + hard_filters[analysis_feature] = {} + + positive_sample_set_description = positive_sample_set[analysis_feature].describe() + print('positive_sample_set:') + + positive_output = {} + + for l in labels: + positive_output[l] = positive_sample_set_description[l] + + positive_whis = get_whiskers(positive_sample_set[analysis_feature]) + positive_output['0.35%'] = positive_whis[0] + positive_output['99.65%'] = positive_whis[1] + + print(positive_output) + + ############ + + negative_sample_set_description = negative_sample_set[analysis_feature].describe() + print('negative_sample_set:') + + negative_output = {} + + for l in labels: + negative_output[l] = negative_sample_set_description[l] + + negative_whis = get_whiskers(negative_sample_set[analysis_feature]) + negative_output['0.35%'] = negative_whis[0] + negative_output['99.65%'] = negative_whis[1] + + print(negative_output) + + NU = negative_output['99.65%'] + NL = negative_output['0.35%'] + PU = positive_output['99.65%'] + PL = positive_output['0.35%'] + + if NU == PU and NL == PL: + hard_filters[analysis_feature]['filter_type'] = 'equal' + hard_filters[analysis_feature]['accept_zone'] = [] + hard_filters[analysis_feature]['reject_zone'] = [] + hard_filters[analysis_feature]['unsure_zone'] = [[NL, NU]] + elif NU >= PU and NL <= PL: + hard_filters[analysis_feature]['filter_type'] = 'contain' + hard_filters[analysis_feature]['accept_zone'] = [] + hard_filters[analysis_feature]['reject_zone'] = [[NL, PL], [PU, NU]] + hard_filters[analysis_feature]['unsure_zone'] = [[PL, PU]] + elif NU < PU and NU > PL and NL < PL: + hard_filters[analysis_feature]['filter_type'] = 'intersect-1over0' + hard_filters[analysis_feature]['accept_zone'] = [[NU, PU]] + hard_filters[analysis_feature]['reject_zone'] = [[NL, PL]] + hard_filters[analysis_feature]['unsure_zone'] = [[PL, NU]] + elif NL > PL and NL < PU and NU > PU: + hard_filters[analysis_feature]['filter_type'] = 'intersect-0over1' + hard_filters[analysis_feature]['accept_zone'] = [[PL, NL]] + hard_filters[analysis_feature]['reject_zone'] = [[PU, NU]] + hard_filters[analysis_feature]['unsure_zone'] = [[NL, PU]] + else: + hard_filters[analysis_feature]['filter_type'] = 'undefine' + hard_filters[analysis_feature]['accept_zone'] = [] + hard_filters[analysis_feature]['reject_zone'] = [] + hard_filters[analysis_feature]['unsure_zone'] = [] + + with open(csv_path, 'a') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([analysis_feature, str(hard_filters[analysis_feature]['filter_type']), str(hard_filters[analysis_feature]['accept_zone']), str(hard_filters[analysis_feature]['reject_zone']), str(hard_filters[analysis_feature]['unsure_zone'])]) + csv_file.close() + +print(hard_filters) + + + # input('Press ENTER to continue...') + + diff --git a/models/vgg_process/data_preparation.py b/models/vgg_process/data_preparation.py new file mode 100644 index 0000000..df08740 --- /dev/null +++ b/models/vgg_process/data_preparation.py @@ -0,0 +1,72 @@ +# Create dataset for panel and nopanel +import os +import cv2 +os.environ["CUDA_VISIBLE_DEVICES"] = "1" +import glob as gb +original_dataset_dir = './location17/' + +base_dir = './split/' +if not os.path.exists(base_dir): + os.mkdir(base_dir) + +# Create directories +train_dir = os.path.join(base_dir,'train/') +if not os.path.exists(train_dir): + os.mkdir(train_dir) +validation_dir = os.path.join(base_dir,'validation/') +if not os.path.exists(validation_dir): + os.mkdir(validation_dir) +# test_dir = os.path.join(base_dir,'test/') +# if not os.path.exists(test_dir): +# os.mkdir(test_dir) + +train_panel_dir = os.path.join(train_dir,'panel/') +if not os.path.exists(train_panel_dir): + os.mkdir(train_panel_dir) + +train_nopanel_dir = os.path.join(train_dir,'nopanel/') +if not os.path.exists(train_nopanel_dir): + os.mkdir(train_nopanel_dir) + +validation_panel_dir = os.path.join(validation_dir,'panel/') +if not os.path.exists(validation_panel_dir): + os.mkdir(validation_panel_dir) + +validation_nopanel_dir = os.path.join(validation_dir, 'nopanel/') +if not os.path.exists(validation_nopanel_dir): + os.mkdir(validation_nopanel_dir) + + +num = 0 +img_path = gb.glob("./panel_samesize/*.png") +for path in img_path: + img_name = path.split("/")[-1] + + img = cv2.imread(path) +# 0,1,2,3,4,5,6, + if ((num % 10) < 7): + cv2.imwrite(os.path.join(train_panel_dir + img_name),img) +# elif ((num % 10) > 6): +# pass +# cv2.imwrite(os.path.join(test_panel_dir +str(1) + img_name),img) + else: + cv2.imwrite(os.path.join(validation_panel_dir + img_name),img) + num = num + 1 +num = 0 +img_path = gb.glob("./nopanel_undersample/*.png") +for path in img_path: + img_name = path.split("/")[-1] + + img = cv2.imread(path) + if ((num % 10) < 7): + cv2.imwrite(os.path.join(train_nopanel_dir +img_name),img) +# elif ((num % 10) > 6): +# cv2.imwrite(os.path.join(test_nopanel_dir +img_name),img) + else: + cv2.imwrite(os.path.join(validation_nopanel_dir +img_name),img) + num = num + 1 +# Sanity checks +print('total training panel images:', len(os.listdir(train_panel_dir))) +print('total training nopanel images:', len(os.listdir(train_nopanel_dir))) +print('total validation panel images:', len(os.listdir(validation_panel_dir))) +print('total validation nopanel images:', len(os.listdir(validation_nopanel_dir))) diff --git a/models/vgg_process/metrics.py b/models/vgg_process/metrics.py new file mode 100644 index 0000000..ad1985f --- /dev/null +++ b/models/vgg_process/metrics.py @@ -0,0 +1,37 @@ +import math +import csv + + +def metric(panel_panel, panel_nopanel,nopanel_panel,nopanel_nopanel): + metric = {} + TP = panel_panel + FN = panel_nopanel + FP = nopanel_panel + TN = nopanel_nopanel + ACCURACY = float((TP + TN)/(TP + FP + FN + TN)) + PRECISION = float(TP/(TP + FP)) + RECALL = float(TP/(TP + FN)) + F1 = float(2*PRECISION*RECALL/(PRECISION + RECALL)) + MCC = float((TP * TN - FP * FN)/ math.sqrt((TP + FP) * (FN + TN) * (FP + TN) * (TP + FN))) + SPECIFICITY = float(TN/(TN + FP)) + metric['TP'] = float(TP/(TP + FN)) + metric['FN'] = float(FN /(TP + FN)) + metric['TN'] = float(TN /(TN + FP)) + metric['FP'] =float(FP /(TN + FP)) + metric['ACCURACY'] = ACCURACY + metric['PRECISION'] =PRECISION + metric['RECALL']= RECALL + metric['F1'] = F1 + metric['MCC'] = MCC + metric['SPECIFICITY'] = SPECIFICITY + metric['description'] = 'vgg pure nosplit' + print(metric) + csvpath = './solarpanel/svm/metric.csv' + with open(csvpath, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([metric['description'],metric['TP'],metric['FN'],metric['TN'],metric['FP'],metric['ACCURACY'],metric['PRECISION'],metric['RECALL'],metric['F1'],metric['MCC'],metric['SPECIFICITY']]) + csvfile.close() + +# call function by the number panel_panel, panel_nopanel, nopanel_panel,nopanel_nopanel +# for exmaple +metric(603,276,8671,15396) \ No newline at end of file diff --git a/models/vgg_process/train_validation.py b/models/vgg_process/train_validation.py new file mode 100644 index 0000000..8f68428 --- /dev/null +++ b/models/vgg_process/train_validation.py @@ -0,0 +1,25 @@ +import csv +import cv2 + +csvpath_all = './feature_test.csv' +with open(csvpath_all, 'a') as csvfile: + myFields = ['id','image', 'size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','prediction','prediction_class','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +csv_path = './vgg_predict-Copy2.csv' +with open(csv_path, newline='') as csv_file: + reader = csv.DictReader(csv_file) + for row in reader: + contour = row + with open(csvpath_all, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'],contour['image'],contour['size'],contour['pole'],contour['mean'],contour['square'],contour['ratiowh'],contour['ratioarea'],contour['approxlen'],contour['numangle'],contour['numangle90'], contour['numangle70'],contour['prediction'],contour['prediction_class'],contour['label']]) + csvfile.close() + + + + + + \ No newline at end of file diff --git a/models/vgg_process/vgg_images_test.py b/models/vgg_process/vgg_images_test.py new file mode 100644 index 0000000..95115b7 --- /dev/null +++ b/models/vgg_process/vgg_images_test.py @@ -0,0 +1,115 @@ +# this file is to test the vgg model +import sys +import os +os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"]="1" +from keras.preprocessing.image import ImageDataGenerator +import csv +import numpy as np +import math +import cv2 +import tensorflow as tf +import glob as gb +import time +import os +import timeit + +start = timeit.default_timer() + + + +CATEGORIES = ["panel", "nopanel"] + +# Input dirs + +model_path = './final/split/' +path = model_path +model = tf.keras.models.load_model(os.path.join(path,'20191014-173338.hdf5')) + +panel_panel = 0 +panel_nopanel = 0 +nopanel_panel = 0 +nopanel_nopanel = 0 +# test the panel result +panel_img_path = gb.glob("./location17/panel/*png") +nopanel_img_path = gb.glob(".//location17/nopanel/*png") + + +contour = {} +csvpath = './location17/vgg_predict.csv' +with open(csvpath, 'a') as csvfile: + myFields = ['id','prediction','prediction_class','label'] + writer = csv.DictWriter(csvfile, fieldnames=myFields) + writer.writeheader() +csvfile.close() + +num = 0 +contour = {} +for path in panel_img_path: + + detected_path = path.split("/")[-1] + contour['id'] = detected_path.split(".")[0] + img = cv2.imread(path) +# print(img.shape) + IMG_SIZE = 150 + img1 = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) + testimg = (img1.reshape(-1, IMG_SIZE, IMG_SIZE, 3)).astype('int32')/255 + prediction_class = model.predict_classes(testimg) + prediction = model.predict(testimg) + contour['prediction'] = prediction[0][0] + contour['prediction_class'] = prediction_class[0][0] + contour['label'] = 1 + if ((prediction_class[0][0]) == 1): + panel_panel = panel_panel + 1 + else: + panel_nopanel = panel_nopanel + 1 + + with open(csvpath, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'],contour['prediction'],contour['prediction_class'],contour['label']]) + csvfile.close() + + +TP = panel_panel +FN = panel_nopanel +# test no panel result + +for path in nopanel_img_path: + detected_path = path.split("/")[-1] + contour['id'] = detected_path.split(".")[0] + img = cv2.imread(path) + IMG_SIZE = 150 + img1 = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) + testimg = (img1.reshape(-1, IMG_SIZE, IMG_SIZE, 3)).astype('int32')/255 + prediction_class = model.predict_classes(testimg) + prediction = model.predict(testimg) + contour['prediction'] = prediction[0][0] + contour['prediction_class'] = prediction_class[0][0] + contour['label'] = 0 + with open(csvpath, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([contour['id'],contour['prediction'],contour['prediction_class'],contour['label']]) + csvfile.close() + + if ((prediction_class[0][0]) == 1): + nopanel_panel = nopanel_panel + 1 + else: + nopanel_nopanel = nopanel_nopanel + 1 + +TN = nopanel_nopanel +FP = nopanel_panel + +stop = timeit.default_timer() +time = {} +time['description'] = 'get vgg prediction on location17' +time['time'] = stop - start +csv_path = './final/time.csv' +with open(csv_path, 'a') as csvfile: + writer = csv.writer(csvfile) + writer.writerow([time['description'],time['time']]) +csvfile.close() +print('Time: ', stop - start) +print(TP, FN,TN ,FP) + + + diff --git a/models/vgg_process/vgg_images_train.py b/models/vgg_process/vgg_images_train.py new file mode 100644 index 0000000..47c7689 --- /dev/null +++ b/models/vgg_process/vgg_images_train.py @@ -0,0 +1,120 @@ +# this is used to trian the vgg model to classify panel and nopanel +import keras +import numpy +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "0" +import sys +from keras.preprocessing.image import ImageDataGenerator +from keras import optimizers +import keras +from keras import models +from keras import layers +from keras.callbacks import TensorBoard +from keras.applications import VGG16 +import datetime +from keras.callbacks import EarlyStopping +from keras.callbacks import ModelCheckpoint + +# data to train vgg model + +# Input dirs + +workspace_dir = './dataset' + +original_dataset_dir = os.path.join(workspace_dir, 'contours') + +train_dir = os.path.join(original_dataset_dir, 'train') + +validation_dir = os.path.join(original_dataset_dir, 'validation') + +train_panel_dir = os.path.join(train_dir, 'panel') + +train_nopanel_dir = os.path.join(train_dir, 'nopanel') + +validation_panel_dir = os.path.join(validation_dir, 'panel') + +validation_nopanel_dir = os.path.join(validation_dir, 'nopanel') + +# Output dirs + +training_model_output_dir = './solar_panel/smalldata/' + +training_log_dir = './solar_panel/smalldata/' + +model_output_dir = './solar_panel/smalldata/' + +# pretrained model imagenet +conv_base = VGG16(weights='imagenet', + include_top=False, + input_shape=(150, 150, 3)) + +NAME = "VGG-16_pretrain_1" +print(NAME) + +# add the last sequential +model = models.Sequential() +model.add(conv_base) +model.add(layers.Flatten()) +model.add(layers.Dense(256, activation='relu')) +model.add(layers.Dense(1, activation='sigmoid')) + +conv_base.trainable = True + +set_trainable = False + +print('trainable weights is :', len(model.trainable_weights)) + +train_datagen = ImageDataGenerator( + rescale=1. / 255, + rotation_range=40, + width_shift_range=0.2, + height_shift_range=0.2, + shear_range=0.2, + zoom_range=0.2, + horizontal_flip=True, + fill_mode='nearest') + +test_datagen = ImageDataGenerator(rescale=1. / 255) + +train_generator = train_datagen.flow_from_directory( + train_dir, + target_size=(150, 150), + batch_size=32, + class_mode='binary') + +validation_generator = test_datagen.flow_from_directory( + validation_dir, + target_size=(150, 150), + batch_size=32, + class_mode='binary') + + +# model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), ) +model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc']) +# use checkpointer to stop trainnig early +checkpointer = ModelCheckpoint(filepath = training_model_output_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + ".hdf5", verbose=1, save_best_only=True) +earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=1) +log_dir = training_log_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") +print (log_dir) +tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True) +callbacks = [ checkpointer,earlystopper,tensorboard_callback] + +history = model.fit_generator( + train_generator, + samples_per_epoch=1000, + epochs=50, + validation_data=validation_generator, + validation_steps=50, + verbose=2, + callbacks=callbacks) +path = model_output_dir +model.save(os.path.join(path ,'VGG16_pretrain_all.model')) + +print('finish') + +sys.stdout.flush() + + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..983a531 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +cycler==0.10.0 +kiwisolver==1.1.0 +matplotlib==3.0.3 +numpy==1.17.2 +opencv-python==4.1.1.26 +pandas==0.25.1 +pyparsing==2.4.2 +python-dateutil==2.8.0 +pytz==2019.3 +scipy==1.3.1 +seaborn==0.9.0 +six==1.12.0 diff --git a/result_presentation/10location_accuray.py b/result_presentation/10location_accuray.py new file mode 100644 index 0000000..82d894d --- /dev/null +++ b/result_presentation/10location_accuray.py @@ -0,0 +1,23 @@ +import pandas as pd +<<<<<<< HEAD +df = pd.read_csv("./data/feature_test_all_vgg_svm_linear.csv") + +for i in range(1,11): + data = pd.read_csv('./finaltest/data/10locations/location' + str(i) + '.csv') +======= +df = pd.read_csv("") +for i in range(1,11): + data = pd.read_csv('' + str(i) + '.csv') +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 + y_predict = data.linear_nosplit_class + y_test =data.label + print(confusion_matrix(y_test, y_predict)) + tn, fp, fn, tp = confusion_matrix(y_test, y_predict, labels=[0,1]).ravel() +<<<<<<< HEAD + with open('./finaltest/data/10locations/10location.csv', 'a') as csvfile: +======= + with open('', 'a') as csvfile: +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 + writer = csv.writer(csvfile) + writer.writerow(['location'+str(i),tn,fp,fn,tp]) + csvfile.close() diff --git a/result_presentation/contours_extraction.py b/result_presentation/contours_extraction.py new file mode 100644 index 0000000..b784e8b --- /dev/null +++ b/result_presentation/contours_extraction.py @@ -0,0 +1,371 @@ +# OpenCV lib +import os +import tensorflow as tf +import cv2 +from skimage.segmentation import slic +from skimage import color +from skimage import data +from skimage import io +# Traverse files +import glob as gb +import tensorflow as tf +# Math lib +import numpy as np +import time +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +import math +import csv + +from matplotlib.pyplot import imshow +import matplotlib.pyplot as plt +import matplotlib.image as mpimg + + +def kmeans(img): + # K-means + # Convert image to one dimension data + img_ori = img.copy() + img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + Z = img.reshape((-1, 3)) + # Z = img.reshape((-1, 3)) + Z = np.float32(Z) + # define criteria, number of clusters(K) and apply kmeans() + criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) + K = 5 + # Run k-means + # ret: compactness + # labels: + # centers: array of centers of clusters + ret, label, center = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS) + # Now convert back into uint8, and make original image + center = np.uint8(center) + res = center[label.flatten()] + res2 = res.reshape(img.shape) + res2_gray = cv2.cvtColor(res2, cv2.COLOR_BGR2GRAY) + + hist = res2_gray.ravel() + hist = set(hist) + hist = sorted(hist) + # print(len(hist)) + threshold = [] + tag = [] + tag1 = [] + tag_dilate3 = [] + tag_dilate5 = [] + tag_dilate7 = [] + tag_close3 = [] + tag_close5 = [] + tag_close7 = [] + for i in range(len(hist) - 1): + threshold.append(int(hist[i] / 2 + hist[i + 1] / 2)) + # no dilate , not accurate + kernal3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) + kernal5 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) + kernal7 = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) + for j in range(len(hist) - 1): + if j == (len(hist) - 2): + dia = cv2.inRange(res2_gray, threshold[j], 255) + tag.append(dia) + tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) + tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) + tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) + else: + dia = cv2.inRange(res2_gray, threshold[j], threshold[j + 1]) + tag.append(dia) + tag_dilate3.append(cv2.dilate(dia, kernal3, iterations=1)) + tag_dilate5.append(cv2.dilate(dia, kernal5, iterations=1)) + tag_dilate7.append(cv2.dilate(dia, kernal7, iterations=1)) + + for j in range(len(hist) - 1): + if j == (len(hist) - 2): + dia1 = cv2.inRange(res2_gray, threshold[j], 255) + tag1.append(dia1) + + tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) + tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) + tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) + else: + dia1 = cv2.inRange(res2_gray, threshold[j], threshold[j + 1]) + tag1.append(dia1) + tag_close3.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal3)) + tag_close5.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal5)) + tag_close7.append(cv2.morphologyEx(dia1, cv2.MORPH_CLOSE, kernal7)) + + # return(tag,tag_dilate3,tag_close3, tag_dilate5,tag_close5, tag_dilate7, tag_close7 ,hist) + return (tag, hist, tag_close3, tag_dilate5, tag_close5, tag_dilate7, tag_close7, hist) + + +# the kernel number is returned , use kernel 3 temporiarly. + +# find contours based on kmeans method +def find_contours(img, mask_list): + # Get the area of roof + masks_length = len(mask_list) + cont = [] + for i in range(0, masks_length): + c, h = cv2.findContours(mask_list[i], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + for contour in c: + cont.append(contour) + # cv2.drawContours(img, cont, -1, (0, 0, 255), 2) + # cv2.imshow('img', img) + # cv2.waitKey(0) + return [img, cont] + + +# use size filter +def filter_size(img, contour): + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + roof_area = cal_roofarea(image_grayscale)[0] + cont = [] + for c in contour: + area = cv2.contourArea(c) + if (area > 0): + ratio = area / roof_area + if ((area > 800) & (ratio < 0.5)): + cont.append(c) + cv2.drawContours(img, cont, -1, (0, 0, 255), 2) + areas = [] + for i, co in enumerate(cont): + areas.append((i, cv2.contourArea(co), co)) + + a2 = sorted(areas, key=lambda d: d[1], reverse=True) + # cv2.drawContours(img, a2, -1, (0, 0, 255), 2) + cv2.imshow('img',img) + cv2.waitKey(0) + cv2.imwrite('./solar_panel/show/47.png',img) + return [img, a2] + + +# calculate the roof area so we can remove a part of the contours +def cal_roofarea(image): + black = cv2.threshold(image, 0, 255, 0)[1] + contours, hierarchy = cv2.findContours(black, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # cv2.drawContours(img, contours, -1, (255, 0, 0), 2) + area = [cv2.contourArea(c) for c in contours] + roof_index = np.argmax(area) + roof_cnt = contours[roof_index] + # contourArea will return the wrong value if the contours are self-intersections + roof_area = cv2.contourArea(roof_cnt) + # print('roof area = '+ str(roof_area)) + return (roof_area, roof_cnt) + + +# calculate the mean pixel value in the contours +def getContourStat(img, contour): + mask = np.zeros(img.shape, dtype="uint8") + cv2.drawContours(mask, [contour], -1, 255, -1) + mean, stddev = cv2.meanStdDev(img, mask=mask) + return mean, stddev + + +# use to show the result of kmeans + +def get_mask(img, mask_list): + masks_length = len(mask_list) + mask_color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 255), (128, 128, 128), (0, 0, 0)] + for i in range(0, masks_length): + img[mask_list[i] != 0] = mask_color[i] + return img + + +def pole(img, contour): + ori_img = img.copy() + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + cont = cal_roofarea(image_grayscale)[1] + cv2.drawContours(ori_img, cont, -1, (255, 0, 0), 3) + # print(len(contour)) + contour_res = [] + back = 1 + cnt = contour + leftmost = tuple(cnt[cnt[:, :, 0].argmin()][0]) + rightmost = tuple(cnt[cnt[:, :, 0].argmax()][0]) + topmost = tuple(cnt[cnt[:, :, 1].argmin()][0]) + bottommost = tuple(cnt[cnt[:, :, 1].argmax()][0]) + pole = [leftmost, rightmost, topmost, bottommost] + for point in pole: + # check the distance with contours, biggest contour + # when it is negative, means the point is outside the contours + dist = cv2.pointPolygonTest(cont, point, True) + # print(dist) + if (dist <= 0): + back = 0 + else: + pass + + return (ori_img, contour, back) + + +def rotate_rectangle(img_name, img, contour): + shape = {} + shape['id'] = img_name + # for c in contour: + c = contour + + area = cv2.contourArea(c) + x, y, w, h = cv2.boundingRect(c) + ratiowh = min(float(w / h), float(h / w)) + shape['ratiowh'] = ratiowh + + ratioarea = float(area / (w * h)) + shape['ratioarea'] = ratioarea + + epsilon = 0.01 * cv2.arcLength(c, True) + approx = cv2.approxPolyDP(c, epsilon, True) + + approxlen = len(approx) + shape['approxlen'] = approxlen + + # the original num set to be -1 to be different no operation + num_angle = 0 + num_angle90 = -1 + num_angle80 = -1 + num_angle70 = -1 + + mask = np.zeros(img.shape, np.uint8) + cv2.drawContours(mask, [approx], -1, (255, 255, 255), -1) + cv2.drawContours(img, [approx], -1, (255, 255, 255), 2) + # mask = np.concatenate((mask, mask, mask), axis=-1) + gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + contour_list = [] + ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) + contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + # get the list of contours + for points in contours[0]: + x, y = points.ravel() + contour_list.append([x, y]) + corners = cv2.goodFeaturesToTrack(gray, 50, 0.01, 10) + corners = np.int0(corners) + for i in corners: + x, y = i.ravel() + # decide whether the corner is on the contours + if (cv2.pointPolygonTest(contours[0], (x, y), True) == 0): + center_index = contour_list.index([x, y]) + length = len(contour_list) + # get the point three before, and ignore the end point + a_index = center_index - 5 + b_index = center_index + 5 + if ((a_index > 0) & (b_index > 0) & (a_index < length) & (b_index < length)): + xa, ya = contour_list[a_index] + xb, yb = contour_list[b_index] + # print(x , y) + # print(xa, ya) + a = math.sqrt((x - xa) * (x - xa) + (y - ya) * (y - ya)) + b = math.sqrt((x - xb) * (x - xb) + (y - yb) * (y - yb)) + c = math.sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb)) + if ((a > 0) & (b > 0)): + if (((a * a + b * b - c * c) / (2 * a * b)) < 1) & (((a * a + b * b - c * c) / (2 * a * b) > -1)): + angle = math.degrees(math.acos((a * a + b * b - c * c) / (2 * a * b))) + num_angle = num_angle + 1 + # print(angle) + if (angle < 90): + num_angle90 = num_angle90 + 1 + if (angle < 80): + num_angle80 = num_angle80 + 1 + if (angle < 70): + num_angle70 = num_angle70 + 1 + cv2.circle(img, (x, y), 5, 255, -1) + + shape['numangle'] = num_angle + shape['numangle90'] = num_angle90 + shape['numangle80'] = num_angle80 + shape['numangle70'] = num_angle70 + # print(shape) + # with open(csv_path, 'a') as csv_file: + # writer = csv.writer(csv_file) + # # writer.writerow(['image_id','size','pole','mean','square']) + # writer.writerow([shape['id'],shape['ratiowh'], shape['ratioarea'],shape['approxlen'],shape['numangle'],shape['numangle90'],shape['numangle80'],shape['numangle70']]) + # # for key, value in contour.items(): + # # writer.writerow([key, value]) + # csv_file.close() + + return (shape) + + +def mean(img, contour): + cont_res = [] + ori_img = img.copy() + + img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) + image_grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + mean_filter = 0 + c = contour + mean = getContourStat(image_grayscale, c)[0] + hist = kmeans(img)[1] + if (mean[0][0] <= (hist[2] + 5)): + # mean = 1 means panel + mean_filter = 1 + + else: + # pass + mean_filter = 0 + # print(mean) + # cv2.drawContours(ori_img, cont_res, -1, (0, 0, 255), -1) + return (ori_img, cont_res, mean_filter) + + +def main(): + img_path = gb.glob("./solar_panel/data/panel/3.png") + + # store the information of contours(the label) + for path in img_path: + contour = {} + img_name = path.split("/")[-1] + img_name = img_name.split(".")[0] + # print(img_name) + # original image + img = cv2.imread(path) + # this is to show the contours so we can label right + img_contour = img.copy() + # tag = kmeans(img.copy())[2] + tag = kmeans(img)[2] + # masks = get_mask(img, tag) + # get the contours + img_contours = find_contours(img, tag)[0] + contours = find_contours(img, tag)[1] + # filter: to remove the contours which is less than 1 block of solar panel + img_size = filter_size(img, contours)[0] + contourinfo = filter_size(img, contours)[1] + # conotur_num is to tag the contours on the image + contour_num = 0 + rank = 0 + + for i, area, c in contourinfo: + + vgg_image = img.copy() + mask = np.zeros_like(img) + + img2gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) + + mask = cv2.drawContours(img2gray, [c], 0, (255, 255, 255), -1) + # cv2.imshow('mask', mask) + # cv2.waitKey(0) + # cv2.destroyAllWindows() + # get second masked value (background) mask must be inverted + img_result = cv2.bitwise_or(vgg_image, vgg_image, mask=mask) + # cv2.imshow('img',img_result) + # cv2.waitKey(0) + # cv2.destroyAllWindows() + mask = cv2.bitwise_not(mask) + background = np.zeros_like(img) + # Fill image with color + background[:] = (255, 0, 255) + # background = np.full(img.shape, 255, dtype=np.uint8) + bk = cv2.bitwise_or(background, background, mask=mask) + final = cv2.bitwise_or(img_result, bk) +<<<<<<< HEAD + cv2.imwrite(('./solar_panel/show/' + str(i) + '.png'),final) +======= + cv2.imwrite(('' + str(i) + '.png'),final) +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 + + print('finish') + + +main() + + + + + + diff --git a/result_presentation/csv_calculate.py b/result_presentation/csv_calculate.py new file mode 100644 index 0000000..4b929eb --- /dev/null +++ b/result_presentation/csv_calculate.py @@ -0,0 +1,58 @@ +import pandas +import pandas as pd +import pickle +import numpy as np +import csv +image_panel = [] +image_nopanel = [] +vgg_panel_panel = [] +vgg_panel_nopanel = [] +vgg_nopanel_panel = [] +vgg_nopanel_nopanel = [] + +lr_panel_panel = [] +lr_panel_nopanel = [] +lr_nopanel_panel = [] +lr_nopanel_nopanel = [] +csv_path = './nosplit/test/vgg_predict-Copy2.csv' +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if ((row['label'] == '1') and (row['image'] not in image_panel)): + image_panel.append(row['image']) + if ((row['label'] == '0') and (row['image'] not in image_nopanel)): + image_nopanel.append(row['image']) + +csvfile.close() +print(len(image_panel),len(image_nopanel)) + +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if ((row['prediction_class'] == '1') and (row['image'] not in vgg_panel_panel) and (row['label'] == '1')): + vgg_panel_panel.append(row['image']) + if ((row['prediction_class'] == '0') and (row['image'] not in vgg_panel_nopanel) and (row['label'] == '1')): + vgg_panel_nopanel.append(row['image']) + if ((row['prediction_class'] == '1') and (row['image'] not in vgg_nopanel_panel) and (row['label'] == '0')): + vgg_nopanel_panel.append(row['image']) + if ((row['prediction_class'] == '0') and (row['image'] not in vgg_nopanel_nopanel) and (row['label'] == '0')): + vgg_nopanel_nopanel.append(row['image']) + +csvfile.close() +print(len(vgg_panel_panel),len( vgg_panel_nopanel),len(vgg_nopanel_panel),len(vgg_nopanel_nopanel)) + + +with open(csv_path, newline='') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + if ((row['lrpredict'] == '1') and (row['image'] not in lr_panel_panel) and (row['label'] == '1')): + lr_panel_panel.append(row['image']) + if ((row['lrpredict'] == '0') and (row['image'] not in lr_panel_nopanel) and (row['label'] == '1')): + lr_panel_nopanel.append(row['image']) + if ((row['lrpredict'] == '1') and (row['image'] not in lr_nopanel_panel) and (row['label'] == '0')): + lr_nopanel_panel.append(row['image']) + if ((row['lrpredict'] == '0') and (row['image'] not in lr_nopanel_nopanel) and (row['label'] == '0')): + lr_nopanel_nopanel.append(row['image']) + +csvfile.close() +print(len(lr_panel_panel),len( lr_panel_nopanel),len(lr_nopanel_panel),len(lr_nopanel_nopanel)) \ No newline at end of file diff --git a/result_presentation/data_stastics/box_plot.py b/result_presentation/data_stastics/box_plot.py new file mode 100644 index 0000000..8e509e4 --- /dev/null +++ b/result_presentation/data_stastics/box_plot.py @@ -0,0 +1,119 @@ +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +# seaborn.boxplot API +# https://seaborn.pydata.org/generated/seaborn.boxplot.html +# Understanding Boxplots +# https://towardsdatascience.com/understanding-boxplots-5e2df7bcbd51 + +# col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] +col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] + +data = pd.read_csv("./data/final/split/feature_17_all.csv", names=col_names) + +data = data.dropna() + +# print(data[:5]) +# print(data.shape) + +<<<<<<< HEAD +g_plot_outputDir = './solarpanel/output/final/split/boxplot/' +======= +g_plot_outputDir = '' +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 + +positive_sample_set = data[data['label'] == 1.0] +negative_sample_set = data[data['label'] == 0.0] +# random_sample_set = data[(data['label'] != 0.0) & (data['label'] != 1.0)] + +analysis_features = ['size', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] +# analysis_features = ['size'] + +labels_to_draw = ['25%','75%'] + +def draw_label(plot, label_type): + labels = [negative_sample_set_description[label_type], positive_sample_set_description[label_type]] + labels_text = [str(np.round(s, 2)) for s in labels] + + pos = range(len(labels_text)) + + for tick,label in zip(pos, plot.get_xticklabels()): + plot.text( + pos[tick], + labels[tick], + labels_text[tick], + ha='center', + va='center', + fontweight='bold', + size=10, + color='white', + bbox=dict(facecolor='#445A64')) + +def draw_single_label(plot, pos, value): + plot.text( + pos, + value, + str(np.round(value, 2)), + ha='center', + va='center', + fontweight='bold', + size=20, + color='white', + bbox=dict(facecolor='#445A64')) + +def get_whiskers(feature_array): + Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) + + IQR = Q3 - Q1 + + loval = Q1 - 1.5 * IQR + hival = Q3 + 1.5 * IQR + + upper_wisk_set = np.compress(feature_array <= hival, feature_array) + lower_wisk_set = np.compress(feature_array >= loval, feature_array) + upper_wisk = np.max(upper_wisk_set) + lower_wisk = np.min(lower_wisk_set) + + return [lower_wisk, upper_wisk] + +palette = sns.color_palette(["#e69138", "#3d85c6"]) + +for analysis_feature in analysis_features: + + positive_sample_set_description = positive_sample_set[analysis_feature].describe() + print('positive_sample_set:') + print(positive_sample_set_description) + positive_whis = get_whiskers(positive_sample_set[analysis_feature]) + print(positive_whis[0]) + print(positive_whis[1]) + + negative_sample_set_description = negative_sample_set[analysis_feature].describe() + print('negative_sample_set:') + print(negative_sample_set_description) + negative_whis = get_whiskers(negative_sample_set[analysis_feature]) + print(negative_whis[0]) + print(negative_whis[1]) + + sns.set(font_scale = 2) + + # Generate boxplot + sns_boxplot = sns.boxplot(x='label', y=analysis_feature, data=data, showfliers=False, palette=palette) + # sns_boxplot = sns.boxplot(x='label', y=analysis_feature, data=data) + + for l in labels_to_draw: + draw_single_label(sns_boxplot, 1, positive_sample_set_description[l]) + draw_single_label(sns_boxplot, 0, negative_sample_set_description[l]) + + for l in positive_whis: + draw_single_label(sns_boxplot, 1, l) + + for l in negative_whis: + draw_single_label(sns_boxplot, 0, l) + + sns_boxplot.set_title(analysis_feature+'_distribution_boxplot') + + fig = sns_boxplot.get_figure() + fig.savefig(g_plot_outputDir + analysis_feature + '_boxplot.png') + plt.show() diff --git a/result_presentation/data_stastics/distribution_plot.py b/result_presentation/data_stastics/distribution_plot.py new file mode 100644 index 0000000..6703aa3 --- /dev/null +++ b/result_presentation/data_stastics/distribution_plot.py @@ -0,0 +1,67 @@ +import math +import numpy as np +import pandas as pd +from pandas.plotting import scatter_matrix +import seaborn as sns +import matplotlib.pyplot as plt + +# seaborn.distplot API +# http://seaborn.pydata.org/generated/seaborn.distplot.html + +col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] + +data = pd.read_csv("./data/Training_set/location_1_7_all.csv", + names=col_names) +data = data.dropna() + +# print(data[:5]) +# print(data.shape) + +analysis_features = ['size', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] + +g_plot_outputDir = './solarpanel/output/location1-7/distributions/' + +positive_sample_set = data[data['label'] == 1.0] +negative_sample_set = data[data['label'] == 0.0] + +for analysis_feature in analysis_features: + + N = max(data[analysis_feature]) + binsize = np.arange(0,N+1,math.ceil(N/100)) + if analysis_feature == 'square' or analysis_feature == 'ratiowh' or analysis_feature == 'ratioarea': + binsize = None + + distplot_labels=['ALL', 'positive_sample_set', 'negative_sample_set'] + + distplot_ked = False + # Generate distplot + # sns_distplot = sns.distplot(data[analysis_feature], kde=distplot_ked, label=distplot_labels[0], bins=binsize); + sns_distplot = sns.distplot(positive_sample_set[analysis_feature], kde=distplot_ked, label=distplot_labels[1], bins=binsize) + sns_distplot = sns.distplot(negative_sample_set[analysis_feature], kde=distplot_ked, label=distplot_labels[2], bins=binsize) + sns_distplot.legend() + + sns_distplot.set_title(analysis_feature+'_distribution', fontsize=30) + fig = sns_distplot.get_figure() + fig.savefig(g_plot_outputDir + analysis_feature + '.png') + plt.show() + + ''' + # Generate distplot for positive_sample_set + sns_distplot = sns.distplot(positive_sample_set[analysis_feature], kde=distplot_ked)#, bins=binsize) + + sns_distplot.set_title(analysis_feature+'_positive_set_distribution') + fig = sns_distplot.get_figure() + fig.savefig(g_plot_outputDir + analysis_feature + '_positive_set_distribution.png') + plt.show() + ''' + +# pd_hist = data.groupby('label')[analysis_feature].hist(alpha=0.4) +# pd_hist = positive_sample_set.hist(column=analysis_features) +# pd_hist = negative_sample_set.hist(column=analysis_features) + +# axis=0 for index, axis=1 for column +# features_only_data = data.drop(['id', 'image'], axis=1) + +# sns_pairplot = sns.pairplot(features_only_data, diag_kind='kde') + +# sns_pairplot.savefig(g_plot_outputDir + 'scatter' + '.png') diff --git a/result_presentation/data_stastics/scatter_grid.py b/result_presentation/data_stastics/scatter_grid.py new file mode 100644 index 0000000..34ecb27 --- /dev/null +++ b/result_presentation/data_stastics/scatter_grid.py @@ -0,0 +1,35 @@ +import math +import numpy as np +import pandas as pd +from pandas.plotting import scatter_matrix +import seaborn as sns +import matplotlib.pyplot as plt + +# seaborn.PairGrid API +# https://seaborn.pydata.org/generated/seaborn.PairGrid.html#seaborn.PairGrid + +col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] + +data = pd.read_csv("./solarpanel/data/Training_set/location_1_7_all.csv", + names=col_names) +data = data.dropna() + +g_plot_outputDir = './solarpanel/output/location1-7/scatter/' + +analysis_features = ['size', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] +# analysis_features = ['size', 'mean'] + +palette = sns.color_palette(["#e69138", "#3d85c6"]) + +# sns.set(font_scale = 1.5) +sns.set_context(rc={'axes.labelsize': 25.0, 'xtick.labelsize': 'small', 'ytick.labelsize': 'small', 'axes.linewidth': 0, 'ytick.major.size': 0, 'xtick.major.size': 0}) +# print(sns.plotting_context()) + +sns_pairplot = sns.PairGrid(data, vars=analysis_features, + hue='label', hue_kws={"marker": ["o", "s"]}, palette=palette) +sns_pairplot = sns_pairplot.map(plt.scatter, linewidths=1, edgecolor="w", s=40) +# sns_pairplot = sns_pairplot.add_legend() + +plt.subplots_adjust(hspace = 0.01, wspace = 0.01) +sns_pairplot.savefig(g_plot_outputDir + 'scatter_grid' + '.png') +plt.show() \ No newline at end of file diff --git a/result_presentation/data_stastics/scatter_plot.py b/result_presentation/data_stastics/scatter_plot.py new file mode 100644 index 0000000..c7ff3f5 --- /dev/null +++ b/result_presentation/data_stastics/scatter_plot.py @@ -0,0 +1,32 @@ +import math +import numpy as np +import pandas as pd +from pandas.plotting import scatter_matrix +import seaborn as sns +import matplotlib.pyplot as plt + +# seaborn.pairplot API +# https://seaborn.pydata.org/generated/seaborn.pairplot.html + +col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] + +data = pd.read_csv("./solarpanel/data/Training_set/location_1_7_all.csv", + names=col_names) +data = data.dropna() + +g_plot_outputDir = './solarpanel/output/location1-7/scatter/' + +analysis_features = ['size', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70'] +# analysis_features = ['size', 'mean'] + +palette = sns.color_palette(["#e69138", "#3d85c6"]) + +sns.set(font_scale = 1.5) + +sns_pairplot = sns.pairplot(data, vars=analysis_features, + hue='label', markers=["o", "s"], palette=palette, + diag_kind='kde') + +sns_pairplot.savefig(g_plot_outputDir + 'scatter_plot' + '.png') + +plt.show() diff --git a/result_presentation/data_stastics/violin_plot.py b/result_presentation/data_stastics/violin_plot.py new file mode 100644 index 0000000..8c617a7 --- /dev/null +++ b/result_presentation/data_stastics/violin_plot.py @@ -0,0 +1,113 @@ +import numpy as np +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +# seaborn.violinplot API +# https://seaborn.pydata.org/generated/seaborn.violinplot.html +# col_names = ['id', 'image', 'size', 'pole', 'mean', 'stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label'] +# col_names = ['id', 'location', 'image', 'size', 'pole', 'mean', 'stddev', 'b_mean', 'g_mean', 'r_mean', 'b_stddev', 'g_stddev', 'r_stddev', 'square', 'ratiowh', 'ratioarea', 'approxlen', 'numangle', 'numangle90', 'numangle70', 'label', 'vgg_pro', 'vgg_class'] +# col_names = ['id', 'location', 'image', 'size', 'pole', 'gray mean', 'gray standard deviation', 'blue mean', 'green mean', 'red mean', 'blue standard deviation', 'green standard deviation', 'red standard deviation', 'square similarity', 'width height ratio', 'area ratio', 'number of curves', 'number of corners', 'number of corners less 90', 'number of corners less 70', 'label', 'vgg_pro', 'vgg_class'] +col_names = ['id', 'location', 'image', 'size', 'pole', 'gray_mean', 'gray_std_deviation', 'blue_mean', 'green_mean', 'red_mean', 'blue_std_deviation', 'green_std_deviation', 'red_std_deviation', 'square_similarity', 'width_height_ratio', 'area_ratio', 'number_of_curves', 'number_of_corners', 'corners_less_90', 'corners_less_70', 'label', 'vgg_pro', 'vgg_class'] + +data = pd.read_csv("./data/final/split/feature_17_all.csv", names=col_names) + +data = data.dropna() + +# print(data[:5]) +# print(data.shape) + +g_plot_outputDir = './output/location1-7/violinplot/' + +positive_sample_set = data[data['label'] == 1.0] +negative_sample_set = data[data['label'] == 0.0] +# random_sample_set = data[(data['label'] != 0.0) & (data['label'] != 1.0)] + + +analysis_features = ['size', 'gray_mean', 'gray_std_deviation', 'blue_mean', 'green_mean', 'red_mean', 'blue_std_deviation', 'green_std_deviation', 'red_std_deviation', 'square_similarity', 'width_height_ratio', 'area_ratio', 'number_of_curves', 'number_of_corners', 'corners_less_90', 'corners_less_70'] +# analysis_features = ['mean'] + + +labels_to_draw = ['25%','75%'] + +def draw_single_label(plot, pos, value): + plot.text( + pos, + value, + str(np.round(value, 2)), + ha='center', + va='center', + fontweight='bold', + size=30, + color='white', + bbox=dict(facecolor='#445A64') + ) + +def get_whiskers(feature_array): + Q1, median, Q3 = np.percentile(np.asarray(feature_array), [25, 50, 75]) + + IQR = Q3 - Q1 + + loval = Q1 - 1.5 * IQR + hival = Q3 + 1.5 * IQR + + upper_wisk_set = np.compress(feature_array <= hival, feature_array) + lower_wisk_set = np.compress(feature_array >= loval, feature_array) + upper_wisk = np.max(upper_wisk_set) + lower_wisk = np.min(lower_wisk_set) + + return [lower_wisk, upper_wisk] + +palette = sns.color_palette(["#e69138", "#3d85c6"]) + +for analysis_feature in analysis_features: + + data_whis = get_whiskers(data[analysis_feature]) + + positive_sample_set_description = positive_sample_set[analysis_feature].describe() + print('positive_sample_set:') + print(positive_sample_set_description) + positive_whis = get_whiskers(positive_sample_set[analysis_feature]) + print(positive_whis[0]) + print(positive_whis[1]) + + negative_sample_set_description = negative_sample_set[analysis_feature].describe() + print('negative_sample_set:') + print(negative_sample_set_description) + negative_whis = get_whiskers(negative_sample_set[analysis_feature]) + print(negative_whis[0]) + print(negative_whis[1]) + + data_to_show = data.loc[(data[analysis_feature] > data_whis[0]) & (data[analysis_feature] < data_whis[1])] + + # Generate boxplot + # sns.set(font_scale = font_scale_value) + # sns.set_context(rc={'xtick.major.size': 6.0, 'ytick.minor.size': 4.0, 'legend.fontsize': 22.0, 'ytick.major.width': 1.25, 'axes.labelsize': 24.0, 'ytick.minor.width': 1.0, 'xtick.minor.width': 1.0, 'font.size': 24.0, 'grid.linewidth': 1.0, 'axes.titlesize': 24.0, 'axes.linewidth': 1.25, 'patch.linewidth': 1.0, 'ytick.labelsize': 22.0, 'xtick.labelsize': 10.0, 'lines.linewidth': 1.5, 'ytick.major.size': 6.0, 'lines.markersize': 6.0, 'xtick.major.width': 1.25, 'xtick.minor.size': 4.0}) + # sns.set_context(rc={'axes.titlesize': 'large', 'grid.linewidth': 0.8, 'lines.markersize': 6.0, 'xtick.major.size': 3.5, 'xtick.major.width': 0.8, 'ytick.major.size': 3.5, 'ytick.minor.width': 0.6, 'axes.linewidth': 0.8, 'xtick.labelsize': 'medium', 'patch.linewidth': 1.0, 'ytick.labelsize': 'medium', 'xtick.minor.size': 2.0, 'font.size': 10.0, 'legend.fontsize': 'medium', 'lines.linewidth': 1.5, 'ytick.minor.size': 2.0, 'xtick.minor.width': 0.6, 'axes.labelsize': 'medium', 'ytick.major.width': 0.8}) + sns.set(rc={'figure.figsize':(10, 6)}) + sns.set_context(rc={'axes.titlesize': 22.0, 'axes.labelsize': 50.0, 'xtick.labelsize': 'small', 'ytick.labelsize': 'small'}) + # print(sns.plotting_context()) + + sns_violinplot = sns.violinplot(x='label', y=analysis_feature, data=data_to_show, showfliers=False, split=False, palette=palette) + # sns_boxplot = sns.boxplot(x='label', y=analysis_feature, data=data) + sns.despine(offset=10, trim=True); + for l in labels_to_draw: + draw_single_label(sns_violinplot, 1, positive_sample_set_description[l]) + draw_single_label(sns_violinplot, 0, negative_sample_set_description[l]) + + for l in positive_whis: + draw_single_label(sns_violinplot, 1, l) + + for l in negative_whis: + draw_single_label(sns_violinplot, 0, l) + + # sns_violinplot.set_title(analysis_feature) + + # ADDED: Extract axes. + sns_violinplot.set_xlabel('') + + fig = sns_violinplot.get_figure() + fig.savefig(g_plot_outputDir + analysis_feature + '_violinplot.png') + + plt.show() + # break diff --git a/result_presentation/draw_pca.py b/result_presentation/draw_pca.py new file mode 100644 index 0000000..320c50f --- /dev/null +++ b/result_presentation/draw_pca.py @@ -0,0 +1,21 @@ +import numpy as np +from sklearn.decomposition import PCA +from sklearn.preprocessing import MinMaxScaler +import matplotlib.pyplot as plt +%matplotlib inline +filepath = './feature_test_all.csv' #your path here +data = np.genfromtxt(filepath, delimiter=',', dtype='float64') + +scaler = MinMaxScaler(feature_range=[0, 1]) +data_rescaled = scaler.fit_transform(data[1:, 3:19]) +#Fitting the PCA algorithm with our Data +pca = PCA().fit(data_rescaled) +#Plotting the Cumulative Summation of the Explained Variance +plt.figure() +plt.plot(np.cumsum(pca.explained_variance_ratio_), linewidth ='3') +plt.xlabel('Number of Components',{'size': 14}) +plt.ylabel('Variance',{'size': 14}) #for each component +# plt.title('Pulsar Dataset Explained Variance') +plt.tight_layout() +plt.savefig('./finaltest/data/pca.png') +plt.show() \ No newline at end of file diff --git a/result_presentation/draw_roc.py b/result_presentation/draw_roc.py new file mode 100644 index 0000000..9863078 --- /dev/null +++ b/result_presentation/draw_roc.py @@ -0,0 +1,171 @@ +import pandas +import pandas as pd +import pickle +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +from sklearn import datasets +from sklearn.preprocessing import StandardScaler +import numpy as np +from sklearn.metrics import classification_report, confusion_matrix +import csv +import time +start_time = time.time() +%matplotlib inline + + + +<<<<<<< HEAD +data = pd.read_csv("./output/svmrbftrainprobility.csv") +======= +data = pd.read_csv("") +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 +data = data.dropna() +# feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] +feature_cols = ['vgg_pro','svmrbfpro'] +X = data[feature_cols] + +scaler = StandardScaler() +X = scaler.fit_transform(X)# Features + +y = data.label # Target variable + +# from sklearn.model_selection import train_test_split +# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0) +X_train = X +y_train = y + +# from sklearn.svm import SVC +# svclassifier = SVC(kernel='rbf',class_weight='balanced') +# model = svclassifier.fit(X_train, y_train) + + +# use linear regression +from sklearn.linear_model import LogisticRegression +model = LogisticRegression(class_weight = 'balanced') + +# instantiate the model (using the default parameters) + +# fit the model with data +model.fit(X_train, y_train) +# from sklearn.externals import joblib +# from joblib import dump, load +# dump(model, 'svmrbfhybrid.joblib') +# model = load('svmrbfhybrid.joblib') +print(model.coef_ ) +print(model.intercept_ ) +from sklearn import metrics + + + + +<<<<<<< HEAD +datatest = pd.read_csv("./split/output/svmrbftestpro.csv") +======= +datatest = pd.read_csv("") +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 +datatest = datatest.dropna() +# feature_cols = ['vgg_pro','vgg_class','svmrbf_class','svmrbfpro'] +feature_cols = ['vgg_pro','svmrbfpro'] +Xtest = datatest[feature_cols] +scaler = StandardScaler() +Xtest = scaler.fit_transform(Xtest)# Features +ytest = datatest.label # Target variable +y_predict_vgg = datatest.vgg_pro +y_predict_svm = datatest.svmrbfpro + + + +y_predict= model.predict(Xtest) +y_predict_pro = model.predict_proba(Xtest) +y_predict_pro = y_predict_pro[:, 1] + + + +df = pd.DataFrame(datatest) +df.insert(25, "svm_nosplit_pro", y_predict_pro, True) +df.insert(26, "svm_nosplit_class", y_predict, True) + +<<<<<<< HEAD +export_csv = df.to_csv ('./vggsvmlogicalregression2features.csv', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('./split/result.csv', 'a') as csvfile: +======= +export_csv = df.to_csv ('', index = None) +print(confusion_matrix(ytest, y_predict)) +tn, fp, fn, tp = confusion_matrix(ytest, y_predict, labels=[0,1]).ravel() +print(tn,fp,fn,tp) +with open('', 'a') as csvfile: +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 + writer = csv.writer(csvfile) + writer.writerow(['',tn,fp,fn,tp]) +csvfile.close() +time = time.time() - start_time +<<<<<<< HEAD +with open('./split/time.csv', 'a') as csvfile: +======= +with open('', 'a') as csvfile: +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 + writer = csv.writer(csvfile) + writer.writerow(['',time]) +csvfile.close() + + + +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.metrics import accuracy_score +from sklearn.metrics import cohen_kappa_score +from sklearn import metrics +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import average_precision_score +from sklearn.metrics import matthews_corrcoef +from sklearn.metrics import roc_auc_score +from sklearn.metrics import balanced_accuracy_score +from sklearn.metrics import roc_curve +from matplotlib import pyplot +print(confusion_matrix(ytest, y_predict)) +print(classification_report(ytest, y_predict)) +print(accuracy_score(ytest, y_predict)) +print(balanced_accuracy_score(ytest, y_predict)) +print(metrics.precision_score(ytest, y_predict)) +print(metrics.recall_score(ytest, y_predict)) +print(metrics.f1_score(ytest, y_predict)) +print(matthews_corrcoef(ytest, y_predict)) +print(roc_auc_score(ytest, y_predict)) +print(roc_auc_score(ytest, y_predict_vgg )) +print(roc_auc_score(ytest, y_predict)) +lr_fpr, lr_tpr, _ = roc_curve(ytest, y_predict_pro) +lr_fpr_vgg, lr_tpr_vgg, _ = roc_curve(ytest, y_predict_vgg ) +lr_fpr_svm, lr_tpr_svm, _ = roc_curve(ytest, y_predict_svm) + +# pyplot.plot(lr_fpr, lr_tpr, marker='x', label='Logistic',linewidth=2,linestyle='dashed') +# pyplot.plot(lr_fpr_vgg, lr_tpr_vgg, marker='o', label='vgg') +# pyplot.plot(lr_fpr_svm, lr_tpr_svm, marker='v', label='svm kernel=rbf') + +pyplot.plot(lr_fpr, lr_tpr, label='SolarFinder', linewidth=3, linestyle='-',color='green') +pyplot.plot(lr_fpr_vgg, lr_tpr_vgg, label='Pure CNN', linewidth=3, linestyle=':',color='red') +pyplot.plot(lr_fpr_svm, lr_tpr_svm, label='Pure SVM', linewidth=3, linestyle='--',color='orange') + +pyplot.xlabel('False Positive Rate',{'size': 14}) +pyplot.ylabel('True Positive Rate',{'size': 14}) +# show the legend +pyplot.legend() +pyplot.tight_layout() +<<<<<<< HEAD +pyplot.savefig('./finaltest/data/split_roc.png') +======= +pyplot.savefig('') +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 +# show the plot +pyplot.show() + + + + + + + + + + diff --git a/result_presentation/feature_statistics.py b/result_presentation/feature_statistics.py new file mode 100644 index 0000000..baf73a0 --- /dev/null +++ b/result_presentation/feature_statistics.py @@ -0,0 +1,31 @@ +# %matplotlib inline +import numpy as np +import pandas as pd +from scipy import stats, integrate +import matplotlib.pyplot as plt +import seaborn as sns +sns.set(color_codes=True) + +<<<<<<< HEAD +# col_names = ['id','image','size','pole','mean','square','ratiowh','ratioarea','approxlen','numangle','numangle90','numangle70','label'] +# # load dataset +# data = pd.read_csv("./mean.csv", names=col_names) +# data = data.dropna() + +# df = pd.DataFrame(data, columns=["size",'label']) +# sns.jointplot(x="size", y="label", data=df) +# plt.savefig("out.png") +# mean, cov = [0, 1], [(1, .5), (.5, 1)] +# data = np.random.multivariate_normal(mean, cov, 200) +# df = pd.DataFrame(data, columns=["x", "y"]) +# sns.barplot(x="x", y="y", data=df); + +import seaborn as sns +sns.set(style="darkgrid") +titanic = pd.read_csv("./mean.csv") +======= +import seaborn as sns +sns.set(style="darkgrid") +titanic = pd.read_csv("") +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 +ax = sns.countplot(x="mean", data=titanic) \ No newline at end of file diff --git a/result_presentation/kmeans_draw.py b/result_presentation/kmeans_draw.py new file mode 100644 index 0000000..0c65573 --- /dev/null +++ b/result_presentation/kmeans_draw.py @@ -0,0 +1,57 @@ +import cv2 +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.image import imread +import pandas as pd +import seaborn as sns +import math +from sklearn.datasets.samples_generator import (make_blobs, + make_circles, + make_moons) +from sklearn.cluster import KMeans, SpectralClustering +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import silhouette_samples, silhouette_score +import matplotlib.pyplot as plt +from matplotlib import style +from sklearn.cluster import KMeans +from sklearn.datasets.samples_generator import make_blobs + +%matplotlib inline +import numpy as np + + + + +<<<<<<< HEAD +img = imread('./finaltest/data/roof_images/28.png') +======= +img = imread('') +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 +img_size = img.shape + +print(img_size) +# Reshape it to be 2-dimension +X = img.reshape(img_size[0] * img_size[1], img_size[2]) +print(X.shape) + +cost =[] +for i in range(1, 11): + KM = KMeans(n_clusters = i, max_iter = 100) + KM.fit(X) + + # calculates squared error + # for the clustered points + cost.append(KM.inertia_) + +# plot the cost against K values +plt.plot(range(1, 11), cost, color ='g', linewidth ='3') +# plt.rcParams.update({'font.size': 22}) +plt.xlabel("Value of K", {'size': 14}) +plt.ylabel("Sqaured Error (Cost)", {'size': 14}) +plt.tight_layout() +<<<<<<< HEAD +plt.savefig("./data/roof_images/square_error28.png") +======= +plt.savefig("") +>>>>>>> 39db66de7b321f1d8347e674b5c8fa5f34ff3b62 +plt.show() # clear the plot \ No newline at end of file diff --git a/tools/solar_labeller b/tools/solar_labeller new file mode 160000 index 0000000..88a2897 --- /dev/null +++ b/tools/solar_labeller @@ -0,0 +1 @@ +Subproject commit 88a2897e17646e4351f285921d6ffc9fbc9ab8c4