-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhelperMethod_extractor_java
More file actions
110 lines (74 loc) · 2.79 KB
/
helperMethod_extractor_java
File metadata and controls
110 lines (74 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from tree_sitter import Language, Parser
import json
from pymongo import MongoClient
def instantiate_language(name, url):
Language.build_library(f"{name}.so", [name])
return Language(f"/student/mjr175/commentGeneration/java/{name}.so", name)
# instantiate language
JAVA = instantiate_language("java", "https://github.com/tree-sitter/tree-sitter-java")
# instantiate parser
parser = Parser()
parser.set_language(JAVA)
def traverse(node, results):
if node is None:
return results
# Print node information
if(node.type=="method_invocation"):
node_text = node.text.decode('utf-8')
#print('Called_Method: ',node_text)
functionName = []
cnt = 0
arg_string=''
arg_list=[]
for child in node.children:
if(child.type=="argument_list"):
for args in child.children:
arg_string+=args.type
arg_string = arg_string.strip('()')
arg_list = [item.strip() for item in arg_string.split(',')]
cnt = len(arg_list)
if(cnt==1 and len(arg_string)==0):
cnt=0
if(child.type=="identifier"):
functionName.append(child.text.decode('utf-8'))
#print('function name: ',functionName.pop())
#print('argument count: ',cnt)
helperInf = {
"Syntax":node_text,
"FunctionName":functionName.pop(),
"ArgsCount":cnt
}
results.append(helperInf)
# Recursively traverse child nodes
for child in node.children:
traverse(child,results)
return results
def extractHelpers(code):
tree = parser.parse((code))
root = tree.root_node
helperInfo=[]
return traverse(root,helperInfo)
# client = MongoClient("localhost",27017)
client = MongoClient('mongodb+srv://mbillahmim:dmY5XMETcLbLSmF6@cluster0.h3plcij.mongodb.net/')
db = client.sample_mflix
helperMethods = db.helperMethods
with open('/student/mjr175/commentGeneration/ASAP_DATASET/Java_data/Java_data/test.jsonl', 'r') as file:
lines = file.readlines()
for line in lines:
data = json.loads(line)
repo = data.get("repo")
path = data.get("path")
method_name = data.get("func_name")
language = data.get("language")
code = data.get("code")
docstring_tokens = data.get("docstring_tokens")
docstring = ' '.join(docstring_tokens)
sha = data.get("sha")
url = data.get("url")
partition = data.get("partition")
# helpers = extractHelpers(code.encode('utf-8'))
helpers = {
"helpers":extractHelpers(code.encode('utf-8'))
}
data["helpers"] = extractHelpers(code.encode('utf-8'))
helperMethods.insert_one(data)