Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Break submit trace into two spans and add tracing to wrapper #592

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions bin/jobsub_submit
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ from submit_support import (
)
from tarfiles import do_tarballs
from token_mods import get_job_scopes, use_token_copy
from tracing import as_span, log_host_time
from tracing import as_span, log_host_time, get_propagator_carrier
from utils import (
set_extras_n_fix_units,
cleanup,
Expand All @@ -70,17 +70,9 @@ from version import print_version, print_support_email
verbose = 0 # pylint: disable=invalid-name


# pylint: disable=too-many-branches,too-many-statements
@as_span("jobsub_submit", is_main=True)
@as_span("jobsub", is_main=True)
def main():
"""script mainline:
- parse args
- get credentials
- handle tarfile options
- set added values from environment, etc.
- convert/render template files to submission files
- launch
"""
"""main entrypoint, handles basic arg parsing and creates parent span for submission"""
global verbose # pylint: disable=global-statement,invalid-name
parser = get_parser()

Expand All @@ -104,8 +96,6 @@ def main():

verbose = args.verbose

log_host_time(verbose)

# if they were trying to pass LD_LIBRARY_PATH to the job, get it from HIDE_LD_LIBRARY_PATH
if "LD_LIBRARY_PATH" in args.environment and os.environ.get(
"HIDE_LD_LIBRARY_PATH", ""
Expand All @@ -115,6 +105,33 @@ def main():
for x in args.environment
]

# get tracing propagator traceparent id so we can use it in templates, etc.
if args.traceparent is None:
carrier = get_propagator_carrier()
if carrier and "traceparent" in carrier:
args.traceparent = carrier["traceparent"]
else:
args.traceparent = ""

if verbose > 0:
sys.stderr.write(f"Setting traceparent: {args.traceparent}\n")

submit(args)


# pylint: disable=too-many-branches,too-many-statements
@as_span("jobsub_submit")
def submit(args):
"""script mainline:
- get credentials
- handle tarfile options
- set added values from environment, etc.
- convert/render template files to submission files
- launch
"""

log_host_time(args.verbose)

if args.version:
print_version()

Expand Down
5 changes: 5 additions & 0 deletions lib/get_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ def get_base_parser(add_condor_epilog: bool = False) -> argparse.ArgumentParser:
action=CheckIfValidSchedd,
help=argparse.SUPPRESS,
)
group.add_argument(
"--traceparent",
help="Trace context",
default=os.environ.get("TRACEPARENT", None),
)
return parser


Expand Down
13 changes: 0 additions & 13 deletions lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import uuid

import classad # type: ignore # pylint: disable=import-error
from tracing import get_propagator_carrier
import token_mods

from creds import CredentialSet
Expand Down Expand Up @@ -199,18 +198,6 @@ def set_extras_n_fix_units(

set_some_extras(args, schedd_name, cred_set)

#
# get tracing propagator traceparent id so we can use it in templates, etc.
#
carrier = get_propagator_carrier()
if carrier and "traceparent" in carrier:
args["traceparent"] = carrier["traceparent"]
else:
args["traceparent"] = ""

if args["verbose"] > 0:
sys.stderr.write(f"Setting traceparent: {args['traceparent']}\n")

# Read in credentials
for cred_type, cred_path in vars(cred_set).items():
args[cred_type] = cred_path
Expand Down
37 changes: 37 additions & 0 deletions templates/simple/simple.sh
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,44 @@ redirect_output_finish(){
{%endif%}
}

trace_start(){
if ( is_set $TRACEPARENT ); then
version="00"
export JSB_TRACE_ID=`echo $TRACEPARENT | cut -d'-' -f 2`
export JSB_PARENT_SPAN_ID=`echo $TRACEPARENT | cut -d'-' -f 3`
sample=`echo $TRACEPARENT | cut -d'-' -f 4`
# 8-byte random span ID e.g. adce7acee6441ec74
export JSB_SPAN_ID=`head -c8 /dev/urandom | hexdump -e '"%02x"'`
Copy link
Contributor Author

@retzkek retzkek Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any better portable ideas to generate a span ID?

uuidgen was suggested, which appears to be available alongside hexdump, but the output would need to be munged a bit - we need 8 bytes/16 chars, so could be the last two parts* which should be unique even if uuidgen falls back to time-based (which is probably unlikely). Is that better? ¯\_(ツ)_/¯

Apptainer> for x in {1..5}; do uuidgen | tr -d '-' | cut -c '17-'; done
98ea0d9203e32de4
a670a63510d929e5
a362b70ee7c45a2c
adbdcecf9ff8fe00
a003fc48358eb47b
Apptainer> for x in {1..5}; do uuidgen -t | tr -d '-' | cut -c '17-'; done
9604001a4af11e5f
9d58001a4af11e5f
a312001a4af11e5f
a2ed001a4af11e5f
b76a001a4af11e5f
  • the first 8 bytes will include the UUID version, but then maybe that's not bad really.

# epoch nanoseconds e.g. 1544712660000000000
export JSB_SPAN_START=`date +%s%N`
# create child span
export TRACEPARENT="$version-$JSB_TRACE_ID-$JSB_SPAN_ID-$sample"
fi
}

trace_finish(){
# TODO check some other env vars
if ( is_set $OTEL_EXPORTER_OTLP_ENDPOINT ); then
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="$OTEL_EXPORTER_OTLP_ENDPOINT/v1/traces"
fi
if (( is_set $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT ) && ( is_set $TRACEPARENT )); then
export JSB_SPAN_END=`date +%s%N`
# example trace JSON https://github.com/open-telemetry/opentelemetry-proto/blob/v1.3.2/examples/trace.json
payload=`cat << __HEREDOC__
{"resourceSpans":[{"resource":{"attributes":[{"key":"service.name","value":{"stringValue":"fife"}}]},
"scopeSpans":[{"scope":{"name":"jobsub.wrapper","version":"1.0.0","attributes":[]},
"spans":[{"traceId":"$JSB_TRACE_ID","spanId":"$JSB_SPAN_ID","parentSpanId":"$JSB_PARENT_SPAN_ID","name":"job","startTimeUnixNano":"$JSB_SPAN_START","endTimeUnixNano":"$JSB_SPAN_END","kind":1,
"attributes":[{"key":"hostname","value":{"stringValue":"$HOSTNAME"}},
{"key":"job.id","value":{"stringValue":"$JOBSUBJOBID"}}]}]}]}]}
__HEREDOC__`
echo $payload 1>&2
echo $payload | curl -s -k --data-binary @- -XPOST -H 'Content-Type: application/json' -H 'Accept: application/json' $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
fi
}


normal_exit(){
trace_finish
redirect_output_finish

# maybe don't cleanup so we can transfer files back...
Expand Down Expand Up @@ -202,6 +238,7 @@ touch .empty_file
args="$@"
set - ""
[[ "$JOBSUB_DEBUG" ]] && set_jobsub_debug
trace_start

export JSB_TMP=$_CONDOR_SCRATCH_DIR/jsb_tmp
mkdir -p $JSB_TMP
Expand Down