diff --git a/Generator/ga_main.py b/Generator/ga_main.py index fca44288..3efa269a 100644 --- a/Generator/ga_main.py +++ b/Generator/ga_main.py @@ -11,6 +11,8 @@ import pandas as pd from decimal import Decimal from util import Utilty +import urllib +import urllib.parse # Type of printing. OK = 'ok' # [*] @@ -97,11 +99,20 @@ def create_genom(self, df_gene): # Evaluation. def evaluation(self, obj_ga, df_gene, eval_place, individual_idx): # Build html syntax. - indivisual = self.util.transform_gene_num2str(df_gene, obj_ga.genom_list) - html = self.template.render({eval_place: indivisual}) + individual = self.util.transform_gene_num2str(df_gene, obj_ga.genom_list) + encoded_individual = urllib.parse.quote(individual) + + html = self.template.render({eval_place: individual}) eval_html_path = self.util.join_path(self.html_dir, self.html_file.replace('*', str(individual_idx))) + + encoded_html = self.template.render({eval_place: encoded_individual}) + encoded_eval_html_path = self.util.join_path(self.html_dir, self.html_file.replace('*', 'Encoded'+str(individual_idx))) + with codecs.open(eval_html_path, 'w', encoding='utf-8') as fout: fout.write(html) + + with codecs.open(encoded_eval_html_path, 'w', encoding='utf-8') as fout: + fout.write(encoded_html) # Evaluate html syntax using tidy. command = self.html_checker + ' ' + self.html_checker_option + ' ' + \ @@ -131,25 +142,23 @@ def evaluation(self, obj_ga, df_gene, eval_place, individual_idx): # Evaluate running script using selenium. selenium_score, error_flag = self.util.check_individual_selenium(self.obj_browser, eval_html_path) - if error_flag: + + # Evaluate the running encoded script using selenium + encoded_selenium_score, encoded_error_flag = self.util.check_individual_selenium(self.obj_browser, eval_html_path) + + if error_flag and encoded_error_flag: return None, 1 # Check result of selenium. - if selenium_score > 0: - self.util.print_message(OK, 'Detect running script: "{}" in {}.'.format(indivisual, eval_place)) + if selenium_score > 0 or encoded_selenium_score > 0: + self.util.print_message(OK, 'Detect running script: "{}" in {}.'.format(individual, eval_place)) # compute score for running script. int_score += self.bingo_score - self.result_list.append([eval_place, obj_ga.genom_list, indivisual]) + self.result_list.append([eval_place, obj_ga.genom_list, individual, encoded_individual]) # Output evaluation results. - self.util.print_message(OK, 'Evaluation result : Browser={} {}, ' - 'Individual="{} ({})", ' - 'Score={}'.format(self.obj_browser.name, - self.obj_browser.capabilities['version'], - indivisual, - obj_ga.genom_list, - str(int_score))) + self.util.print_message(OK, 'Evaluation result : Browser={} {}, ''Individual="{} ({})", ''Score={}'.format(self.obj_browser.name,self.obj_browser.capabilities['version'],individual,obj_ga.genom_list,str(int_score))) return int_score, 0 # Select elite individual. @@ -219,12 +228,9 @@ def main(self): # Create saving file (only header). save_path = self.util.join_path(self.result_dir, self.result_file.replace('*', self.obj_browser.name)) if os.path.exists(save_path) is False: - pd.DataFrame([], columns=['eval_place', 'sig_vector', 'sig_string']).to_csv(save_path, - mode='w', - header=True, - index=False) + pd.DataFrame([], columns=['eval_place', 'sig_vector', 'sig_string', 'encoded_string']).to_csv(save_path,mode='w',header=True,index=False) - # Evaluate indivisual each evaluating place in html. + # Evaluate individual each evaluating place in html. for eval_place in self.html_eval_place_list: self.util.print_message(NOTE, 'Evaluating html place : {}'.format(eval_place)) @@ -238,22 +244,16 @@ def main(self): for int_count in range(1, self.max_generation + 1): self.util.print_message(NOTE, 'Evaluate individual : {}/{} generation.'.format(str(int_count), self.max_generation)) - for indivisual, idx in enumerate(range(self.max_genom_list)): + for individual, idx in enumerate(range(self.max_genom_list)): self.util.print_message(OK, 'Evaluation individual in {}: ' - '{}/{} in {} generation'.format(eval_place, - indivisual + 1, - self.max_genom_list, - str(int_count))) - evaluation_result, eval_status = self.evaluation(current_generation[indivisual], - df_genes, - eval_place, - idx) + '{}/{} in {} generation'.format(eval_place,individual + 1,self.max_genom_list,str(int_count))) + evaluation_result, eval_status = self.evaluation(current_generation[individual],df_genes,eval_place,idx) idx += 1 if eval_status == 1: - indivisual -= 1 + individual -= 1 continue - current_generation[indivisual].setEvaluation(evaluation_result) + current_generation[individual].setEvaluation(evaluation_result) time.sleep(self.wait_time) # Select elite's individual. @@ -265,15 +265,10 @@ def main(self): progeny_gene.extend(self.crossover(elite_genes[i - 1], elite_genes[i])) # Select elite group. - next_generation_individual_group = self.next_generation_gene_create(current_generation, - elite_genes, - progeny_gene) + next_generation_individual_group = self.next_generation_gene_create(current_generation,elite_genes,progeny_gene) # Mutation - next_generation_individual_group = self.mutation(next_generation_individual_group, - self.individual_mutation_rate, - self.genom_mutation_rate, - df_genes) + next_generation_individual_group = self.mutation(next_generation_individual_group,self.individual_mutation_rate,self.genom_mutation_rate,df_genes) # Finish evolution computing for current generation. # Arrange fitness each individual. @@ -281,11 +276,7 @@ def main(self): # evaluate evolution result. flt_avg = sum(fits) / float(len(fits)) - self.util.print_message(NOTE, '{} generation result: ' - 'Min={}, Max={}, Avg={}.'.format(int_count, - min(fits), - max(fits), - flt_avg)) + self.util.print_message(NOTE, '{} generation result: ''Min={}, Max={}, Avg={}.'.format(int_count,min(fits),max(fits),flt_avg)) # Judge fitness. if flt_avg > self.max_fitness: diff --git a/Generator/gan_main.py b/Generator/gan_main.py index 891de0d5..9b77dd1e 100644 --- a/Generator/gan_main.py +++ b/Generator/gan_main.py @@ -13,6 +13,8 @@ from keras.layers import Dropout from keras import backend as K from util import Utilty +import urllib +import urllib.parse # Type of printing. OK = 'ok' # [*] @@ -157,36 +159,40 @@ def train(self, list_sigs): gene_num -= 1 lst_genom.append(int(gene_num)) str_html = self.util.transform_gene_num2str(self.df_genes, lst_genom) - self.util.print_message(OK, 'Train GAN : epoch={}, batch={}, g_loss={}, d_loss={}, {} ({})'. - format(epoch, batch, g_loss, d_loss, - np.round((generated_code * self.flt_size) + self.flt_size), - str_html)) + encoded_str_html = urllib.parse.quote(str_html) + + self.util.print_message(OK, 'Train GAN : epoch={}, batch={}, g_loss={}, d_loss={}, {} ({})'.format(epoch, batch, g_loss, d_loss,np.round((generated_code * self.flt_size) + self.flt_size),str_html)) # Evaluate generated injection code. for eval_place in self.eval_place_list: - # Build html syntax. + # Build html syntax using str_html. html = self.template.render({eval_place: str_html}) with codecs.open(self.eval_html_path, 'w', encoding='utf-8') as fout: fout.write(html) # Evaluate individual using selenium. - selenium_score, error_flag = self.util.check_individual_selenium(self.obj_browser, - self.eval_html_path) - if error_flag: + selenium_score, error_flag = self.util.check_individual_selenium(self.obj_browser,self.eval_html_path) + + # Build html syntax using encoded_str_html. + html = self.template.render({eval_place: encoded_str_html}) + with codecs.open(self.eval_html_path, 'w', encoding='utf-8') as fout: + fout.write(html) + + # Evaluate encoded individual using selenium. + encoded_selenium_score, encoded_error_flag = self.util.check_individual_selenium(self.obj_browser,self.eval_html_path) + + if error_flag and encoded_error_flag: continue # Check generated individual using selenium. - if selenium_score > 0: - self.util.print_message(WARNING, 'Detect running script: "{}" in {}.'.format(str_html, - eval_place)) + if selenium_score > 0 or encoded_selenium_score > 0: + self.util.print_message(WARNING, 'Detect running script: "{}" in {}.'.format(str_html,eval_place)) # Save running script. - lst_scripts.append([eval_place, str_html]) + lst_scripts.append([eval_place, str_html, encoded_str_html]) # Save weights of network each epoch. - self.generator.save_weights(self.util.join_path(self.weight_dir, - self.gen_weight_file.replace('*', str(epoch)))) - discriminator.save_weights(self.util.join_path(self.weight_dir, - self.dis_weight_file.replace('*', str(epoch)))) + self.generator.save_weights(self.util.join_path(self.weight_dir,self.gen_weight_file.replace('*', str(epoch)))) + discriminator.save_weights(self.util.join_path(self.weight_dir,self.dis_weight_file.replace('*', str(epoch)))) return lst_scripts @@ -221,37 +227,42 @@ def main(self): valid_code_list = [] result_list = [] for idx in range(self.max_explore_codes_num): - self.util.print_message(NOTE, '{}/{} Explore valid injection code.'.format(idx + 1, - self.max_explore_codes_num)) + self.util.print_message(NOTE, '{}/{} Explore valid injection code.'.format(idx + 1,self.max_explore_codes_num)) # Generate injection codes. noise = np.array([np.random.uniform(-1, 1, self.input_size) for _ in range(1)]) generated_codes = self.generator.predict(noise, verbose=0) str_html = self.util.transform_gene_num2str(self.df_genes, self.transform_code2gene(generated_codes[0])) - + encoded_str_html = urllib.parse.quote(str_html) + # Evaluate injection code using selenium. for eval_place in self.eval_place_list: + #checking the str_html html = self.template.render({eval_place: str_html}) with codecs.open(self.eval_html_path, 'w', encoding='utf-8') as fout: fout.write(html) - selenium_score, error_flag = self.util.check_individual_selenium(self.obj_browser, - self.eval_html_path) - if error_flag: + selenium_score, error_flag = self.util.check_individual_selenium(self.obj_browser,self.eval_html_path) + + #checking the encoded_str_html + html = self.template.render({eval_place: encoded_str_html}) + with codecs.open(self.eval_html_path, 'w', encoding='utf-8') as fout: + fout.write(html) + + encoded_selenium_score, encoded_error_flag = self.util.check_individual_selenium(self.obj_browser,self.eval_html_path) + + + if error_flag and encoded_error_flag: continue # Check generated injection code. - if selenium_score > 0: - self.util.print_message(WARNING, 'Find valid injection code: "{}" in {}.'.format(str_html, - eval_place)) + if selenium_score > 0 or encoded_selenium_score > 0: + self.util.print_message(WARNING, 'Find valid injection code: "{}" in {}.'.format(str_html,eval_place)) valid_code_list.append([str_html, noise]) - result_list.append([eval_place, str_html]) + result_list.append([eval_place, str_html, encoded_str_html]) # Save generated injection codes. if os.path.exists(gan_save_path) is False: - pd.DataFrame(result_list, columns=['eval_place', 'injection_code']).to_csv(gan_save_path, - mode='w', - header=True, - index=False) + pd.DataFrame(result_list, columns=['eval_place', 'injection_code', 'encoded_injection_code']).to_csv(gan_save_path,mode='w',header=True,index=False) else: pd.DataFrame(result_list).to_csv(gan_save_path, mode='a', header=False, index=False) @@ -268,38 +279,41 @@ def main(self): synthesized_noise = self.vector_mean(valid_code_list[noise_idx1][1], valid_code_list[noise_idx2][1]) generated_codes = self.generator.predict(synthesized_noise, verbose=0) str_html = self.util.transform_gene_num2str(self.df_genes, self.transform_code2gene(generated_codes[0])) - + encoded_str_html = urllib.parse.quote(str_html) + # Evaluate synthesized injection code using selenium. for eval_place in self.eval_place_list: hit_flag = 'Failure' + + # evaluating str_html html = self.template.render({eval_place: str_html}) with codecs.open(self.eval_html_path, 'w', encoding='utf-8') as fout: fout.write(html) - selenium_score, error_flag = self.util.check_individual_selenium(self.obj_browser, - self.eval_html_path) - if error_flag: + selenium_score, error_flag = self.util.check_individual_selenium(self.obj_browser,self.eval_html_path) + + # evaluating encoded_str_html + html = self.template.render({eval_place: encoded_str_html}) + with codecs.open(self.eval_html_path, 'w', encoding='utf-8') as fout: + fout.write(html) + + encoded_selenium_score, encoded_error_flag = self.util.check_individual_selenium(self.obj_browser,self.eval_html_path) + + + if error_flag and encoded_error_flag: continue # Check synthesized injection code using selenium. - if selenium_score > 0: + if selenium_score > 0 or encoded_selenium_score > 0: self.util.print_message(WARNING, 'Find running script: "{}".'.format(str_html)) hit_flag = 'Bingo' # Save running script. - vector_result_list.append([eval_place, str_html, - valid_code_list[noise_idx1][0], - valid_code_list[noise_idx2][0], - hit_flag]) + vector_result_list.append([eval_place, str_html, encoded_str_html,valid_code_list[noise_idx1][0],valid_code_list[noise_idx2][0],hit_flag]) # Save synthesized injection codes. if os.path.exists(vec_save_path) is False: - pd.DataFrame(vector_result_list, - columns=['eval_place', 'synthesized_code', - 'origin_code1', 'origin_code2', 'bingo']).to_csv(vec_save_path, - mode='w', - header=True, - index=False) + pd.DataFrame(vector_result_list,columns=['eval_place', 'synthesized_code', 'encoded_synthesized_code','origin_code1', 'origin_code2', 'bingo']).to_csv(vec_save_path,mode='w',header=True,index=False) else: pd.DataFrame(vector_result_list).to_csv(vec_save_path, mode='a', header=False, index=False) else: @@ -324,10 +338,7 @@ def main(self): # Save generated injection codes. if os.path.exists(gan_save_path) is False: - pd.DataFrame(lst_scripts, columns=['eval_place', 'injection_code']).to_csv(gan_save_path, - mode='w', - header=True, - index=False) + pd.DataFrame(lst_scripts, columns=['eval_place', 'injection_code', 'encoded_injection_code']).to_csv(gan_save_path,mode='w',header=True,index=False) else: pd.DataFrame(lst_scripts).to_csv(gan_save_path, mode='a', header=False, index=False) diff --git a/Generator/result/ga_result_chrome.csv b/Generator/result/ga_result_chrome.csv index 30ab925d..065bb172 100644 --- a/Generator/result/ga_result_chrome.csv +++ b/Generator/result/ga_result_chrome.csv @@ -1,22230 +1,746 @@ -body_tag,"[74, 103, 2, 180, 207]",