Skip to content

Commit 28c40ea

Browse files
committed
Add script for D2D with SCIO using docker container
Signed-off-by: Tushar Goel <[email protected]>
1 parent 23b94ad commit 28c40ea

File tree

1 file changed

+233
-0
lines changed

1 file changed

+233
-0
lines changed

etc/scripts/run_d2d_scio.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
23+
import argparse
24+
import os
25+
import shutil
26+
import socket
27+
import subprocess
28+
import sys
29+
import time
30+
import uuid
31+
from pathlib import Path
32+
33+
SCANCODE_IMAGE = "ghcr.io/aboutcode-org/scancode.io:latest"
34+
DB_IMAGE = "postgres:13"
35+
DB_USER = "scancode"
36+
DB_PASS = os.getenv("SCANCODE_DB_PASS", "scancode")
37+
DB_NAME = "scancode"
38+
D2D_DIR = Path("d2d")
39+
40+
41+
def get_free_port():
42+
"""Find a free host port for Postgres."""
43+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
44+
s.bind(("", 0))
45+
return s.getsockname()[1]
46+
47+
48+
def safe_run(cmd, capture_output=False, silent=False):
49+
"""Run subprocess command safely with full binary path."""
50+
if not silent:
51+
print(f"Running: {' '.join(cmd)}")
52+
53+
cmd[0] = shutil.which(cmd[0]) or cmd[0]
54+
55+
try:
56+
return subprocess.run(
57+
cmd,
58+
check=True,
59+
text=True,
60+
capture_output=capture_output,
61+
)
62+
except subprocess.CalledProcessError as e:
63+
print(f"Command failed: {' '.join(cmd)}")
64+
print(e.stderr or e.stdout or str(e))
65+
sys.exit(1)
66+
67+
68+
def wait_for_postgres(container_name, timeout=60):
69+
"""Wait until the Postgres container is ready."""
70+
print("Waiting for Postgres to be ready...")
71+
for _ in range(timeout):
72+
result = subprocess.run(
73+
["docker", "exec", container_name, "pg_isready", "-U", DB_USER],
74+
stdout=subprocess.DEVNULL,
75+
stderr=subprocess.DEVNULL,
76+
)
77+
if result.returncode == 0:
78+
print("Postgres is ready.")
79+
return
80+
time.sleep(1)
81+
raise RuntimeError("Postgres did not become ready in time.")
82+
83+
84+
def prepare_d2d_dir(from_file, to_file):
85+
"""Ensure d2d folder exists and contains required files."""
86+
D2D_DIR.mkdir(exist_ok=True)
87+
88+
from_dest = D2D_DIR / Path(from_file).name
89+
to_dest = D2D_DIR / Path(to_file).name
90+
91+
shutil.copy(from_file, from_dest)
92+
shutil.copy(to_file, to_dest)
93+
print(f"Files copied to: {D2D_DIR.resolve()}")
94+
95+
return from_dest.name, to_dest.name
96+
97+
98+
def main():
99+
parser = argparse.ArgumentParser(
100+
description="Run ScanCode.io pipelines in Docker with isolated Postgres DB "
101+
"(using ./d2d directory)."
102+
)
103+
parser.add_argument(
104+
"--input-file",
105+
action="append",
106+
required=True,
107+
help="Format: path/to/file:tag (tag must be 'from' or 'to')",
108+
)
109+
parser.add_argument(
110+
"--option",
111+
action="append",
112+
help="Options for the pipeline, e.g. Python, Java, Javascript",
113+
)
114+
parser.add_argument(
115+
"--output",
116+
required=True,
117+
help="Output file to write the ScanCode results (JSON format)",
118+
)
119+
args = parser.parse_args()
120+
121+
file_map = {}
122+
for f in args.input_file:
123+
try:
124+
path, tag = f.split(":")
125+
file_map[tag] = os.path.abspath(path)
126+
except ValueError:
127+
print(f"Invalid --input-file format: {f}. Use path:tag", file=sys.stderr)
128+
sys.exit(1)
129+
130+
if "from" not in file_map or "to" not in file_map:
131+
print("Both :from and :to input files are required.", file=sys.stderr)
132+
sys.exit(1)
133+
134+
from_name, to_name = prepare_d2d_dir(file_map["from"], file_map["to"])
135+
136+
db_container_name = f"scancode_db_{uuid.uuid4().hex[:6]}"
137+
db_port = get_free_port()
138+
print(f"Using Postgres host port: {db_port}")
139+
140+
project_name = f"scanpipe_{uuid.uuid4().hex[:8]}"
141+
142+
docker_bin = shutil.which("docker") or "docker"
143+
144+
try:
145+
safe_run(
146+
[
147+
docker_bin,
148+
"run",
149+
"-d",
150+
"--name",
151+
db_container_name,
152+
"-e",
153+
f"POSTGRES_USER={DB_USER}",
154+
"-e",
155+
f"POSTGRES_PASSWORD={DB_PASS}",
156+
"-e",
157+
f"POSTGRES_DB={DB_NAME}",
158+
"-p",
159+
f"{db_port}:5432",
160+
DB_IMAGE,
161+
],
162+
silent=True,
163+
)
164+
165+
wait_for_postgres(db_container_name)
166+
db_url = (
167+
f"postgresql://{DB_USER}:{DB_PASS}@host.docker.internal:{db_port}/{DB_NAME}"
168+
)
169+
170+
pipeline_name = "map_deploy_to_develop"
171+
172+
if args.option:
173+
pipeline_name = f"{pipeline_name}:"
174+
175+
for option in args.option or []:
176+
pipeline_name += f"{option},"
177+
178+
pipeline_cmd = (
179+
f"scanpipe create-project {project_name} "
180+
f"--input-file /code/{from_name}:from "
181+
f"--input-file /code/{to_name}:to "
182+
f"--pipeline {pipeline_name} && "
183+
f"scanpipe execute --project {project_name}"
184+
)
185+
186+
docker_cmd = [
187+
docker_bin,
188+
"run",
189+
"--rm",
190+
"-v",
191+
f"{D2D_DIR.resolve()}:/code",
192+
"-e",
193+
f"DATABASE_URL={db_url}",
194+
"--network",
195+
"host",
196+
SCANCODE_IMAGE,
197+
"sh",
198+
"-c",
199+
pipeline_cmd,
200+
]
201+
202+
print("Running ScanCode pipeline:")
203+
result = safe_run(docker_cmd, capture_output=False)
204+
205+
pipeline_cmd = f"scanpipe output --project {project_name} --format json --print"
206+
207+
docker_cmd = [
208+
docker_bin,
209+
"run",
210+
"--rm",
211+
"-v",
212+
f"{D2D_DIR.resolve()}:/code",
213+
"-e",
214+
f"DATABASE_URL={db_url}",
215+
"--network",
216+
"host",
217+
SCANCODE_IMAGE,
218+
"sh",
219+
"-c",
220+
pipeline_cmd,
221+
]
222+
223+
result = safe_run(docker_cmd, capture_output=True)
224+
225+
with open(args.output, "w") as f:
226+
f.write(result.stdout)
227+
228+
finally:
229+
subprocess.run(["docker", "rm", "-f", db_container_name], check=False)
230+
231+
232+
if __name__ == "__main__":
233+
main()

0 commit comments

Comments
 (0)