forked from chyanju/ReSySeVR
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.sh
More file actions
172 lines (150 loc) · 6.18 KB
/
config.sh
File metadata and controls
172 lines (150 loc) · 6.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/bin/bash
# =========================== #
# ==== setup environment ==== #
# =========================== #
echo 'INSTALLDIR=/opt/' >> ~/.bashrc
echo 'JOERN=/opt/joern-0.3.1' >> ~/.bashrc
source ~/.bashrc
sudo chmod 777 -R /opt
# update apt
sudo apt-get -y update
# install dependencies
sudo apt-get -y install git
sudo apt-get -y install python-setuptools python-dev
sudo apt-get -y install graphviz libgraphviz-dev graphviz-dev
sudo apt-get -y install pkg-config
sudo apt-get -y install openjdk-8-*
sudo apt-get -y install ant
sudo apt-get -y install unzip
sudo apt-get -y install p7zip-full
sudo apt-get -y install python-pip
sudo apt-get -y install python3-pip
sudo apt-get -y install python-igraph
# build joern
cd $INSTALLDIR
wget https://github.com/fabsx00/joern/archive/0.3.1.tar.gz
tar xfzv 0.3.1.tar.gz
cd joern-0.3.1/
wget http://mlsec.org/joern/lib/lib.tar.gz
tar xfzv lib.tar.gz
sudo ant
sudo ant tools
echo "alias joern='java -jar /opt/joern-0.3.1/bin/joern.jar'" >> ~/.bashrc
source ~/.bashrc
# build neo4j
cd $INSTALLDIR
wget http://neo4j.com/artifact.php?name=neo4j-community-2.1.8-unix.tar.gz
tar -zxvf artifact.php\?name\=neo4j-community-2.1.8-unix.tar.gz
echo "export Neo4jDir='/opt/neo4j-community-2.1.8/'" >> ~/.bashrc
source ~/.bashrc
wget http://mlsec.org/joern/lib/neo4j-gremlin-plugin-2.1-SNAPSHOT-server-plugin.zip
unzip neo4j-gremlin-plugin-2.1-SNAPSHOT-server-plugin.zip -d $Neo4jDir/plugins/gremlin-plugin
# build py2neo
cd $INSTALLDIR
wget https://github.com/nigelsmall/py2neo/archive/py2neo-2.0.tar.gz
tar zxvf py2neo-2.0.tar.gz
cd /opt/py2neo-py2neo-2.0/
pip uninstall py2neo # uninstall existing one if there's any
sudo python setup.py install
# adding this to make sure you install py2neo 2.0
# note: this may fail, but it doesn't matter
pip install py2neo==2.0
# build python-joern
cd $INSTALLDIR
git clone https://github.com/fabsx00/python-joern.git
pip install py2neo
cd python-joern/
pip install pyparsing
sudo python setup.py install
# build joern-tools
cd $INSTALLDIR
git clone https://github.com/fabsx00/joern-tools
pip install pygraphviz
cd joern-tools/
sudo python setup.py install
# install other dependencies
pip install xlrd
pip3 install gensim==3.8.3
pip3 install imbalanced-learn==0.4.0
pip3 install scikit-learn==0.19.1
# choose tensorflow-gpu if you've got a fancy GPU
pip3 install tensorflow==1.14.0
pip3 install keras==2.3.0
# get ReSySeVR
echo 'ReSySeVR=/opt/ReSySeVR/' >> ~/.bashrc
source ~/.bashrc
cd $INSTALLDIR
git clone https://github.com/chyanju/ReSySeVR.git
# ========================== #
# ==== prepare raw data ==== #
# ========================== #
# (do this for different dataset, e.g., SARD)
cd $ReSySeVR/data
7z x SARD.7z
./split-sard.sh
# ===================================== #
# ==== process data: source2slice/ ==== #
# ===================================== #
cd $ReSySeVR/data/SARD/
# (do this for every partition, e.g., dir_001)
rm -rf ./.joernIndex/
java -jar /opt/joern-0.3.1/bin/joern.jar /opt/ReSySeVR/data/SARD/dir_001/
# (on screen A: start neo4j service)
# note: everytime after you finish processing a batch (dir_xxx),
# you should run the following commands and restart the neo4j service;
# otherwise the indexing will be problematic
cd $INSTALLDIR
rm -rf ./neo4j-community-2.1.8
tar -zxvf artifact.php\?name\=neo4j-community-2.1.8-unix.tar.gz
unzip neo4j-gremlin-plugin-2.1-SNAPSHOT-server-plugin.zip -d $Neo4jDir/plugins/gremlin-plugin
sed -i 's/#org.neo4j.server.webserver.address=0.0.0.0/org.neo4j.server.webserver.address=0.0.0.0/g' /opt/neo4j-community-2.1.8/conf/neo4j-server.properties
sed -i 's/data\/graph.db/\/opt\/ReSySeVR\/data\/SARD\/.joernIndex\//g' /opt/neo4j-community-2.1.8/conf/neo4j-server.properties
sed -i 's/#org.neo4j.server.webserver.address/org.neo4j.server.webserver.address/g' /opt/neo4j-community-2.1.8/conf/neo4j-server.properties
cd $INSTALLDIR/neo4j-community-2.1.8/bin
./neo4j console
# then you can check http://<your_server_ip>:7474/ to see if there are "Relation types"
# if yes, then the configuration is successful
# (on screen B: start processing)
cd $ReSySeVR/src/source2slice/
python ./get_cfg_relation.py # this outputs to src/source2slice/cfg_db/
python ./complete_PDG.py # this outputs to src/source2slice/pdg_db/
python ./access_db_operate.py # this outputs to src/source2slice/dict_call2cfgNodeID_funcID/
python ./points_get.py # this outputs to sensifunc_slice_points.pkl, pointuse_slice_points.pkl, arrayuse_slice_points.pkl, integeroverflow_slice_points_new.pkl
python ./extract_df.py
python ./make_label.py
cp api_slices.txt arraysuse_slices.txt integeroverflow_slices.txt pointersuse_slices.txt ./slices/
cp api_slices_label.pkl api_slices_vulline.pkl array_slice_label.pkl expr_slice_label.pkl pointer_slice_label.pkl ./label_source/
cd $ReSySeVR/src/source2slice/label_source/
mv expr_slice_label.pkl integeroverflow_slices.pkl
mv api_slices_label.pkl api_slices.pkl
mv array_slice_label.pkl arraysuse_slices.pkl
mv pointer_slice_label.pkl pointersuse_slices.pkl
cd $ReSySeVR/src/source2slice/
python ./data_preprocess.py
# ======================================== #
# ==== process data: data_preprocess/ ==== #
# ======================================== #
cp $ReSySeVR/src/source2slice/slice_label/*.txt $ReSySeVR/src/data_preprocess/data/data_source/SARD/
rm $ReSySeVR/src/data_preprocess/data/data_source/SARD/error.txt
cp $ReSySeVR/src/source2slice/label_source/*.pkl $ReSySeVR/src/data_preprocess/data/label_source/SARD/
cd $ReSySeVR/src/data_preprocess/
python3 ./process_dataflow_func.py
python3 ./create_w2vmodel.py
python3 ./get_dl_input.py
python3 ./dealrawdata.py
# ================================ #
# ==== model training: model/ ==== #
# ================================ #
cp -r $ReSySeVR/src/data_preprocess/dl_input_shuffle/ $ReSySeVR/src/model/
cd $ReSySeVR/src/model/
python3 ./bgru.py
# ===================== #
# ==== other notes ==== #
# ===================== #
# if you are having trouble configuring igraph for Python3 (note that here we only need it for Python2,
# but in case you are using JupyterLab to view the data in Python3), you need some additional commands
# to make the environment ready
sudo apt install build-essential libxml2 libxml2-dev zlib1g-dev
pip3 install --upgrade pip
pip3 install python-igraph
pip3 install cairocffi