Skip to content

feat: extra simd kernel branches #115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 219 additions & 1 deletion ext/DynamicExpressionsLoopVectorizationExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ import DynamicExpressions.EvaluateModule:
deg1_l1_ll0_eval,
deg2_l0_r0_eval,
deg2_l0_eval,
deg2_r0_eval
deg2_r0_eval,
deg2_l2_ll0_lr0_r0_eval,
deg2_l0_r2_rl0_rr0_eval
import DynamicExpressions.ExtensionInterfaceModule:
_is_loopvectorization_loaded, bumper_kern1!, bumper_kern2!

Expand Down Expand Up @@ -231,4 +233,220 @@ function bumper_kern2!(
return cumulator1
end

function deg2_l2_ll0_lr0_r0_eval(
tree::AbstractExpressionNode{T},
cX::AbstractMatrix{T},
op::F,
op_l::F2,
eval_options::EvalOptions{true},
) where {T<:Number,F,F2}
if tree.l.l.constant && tree.l.r.constant && tree.r.constant
val_ll = tree.l.l.val
val_lr = tree.l.r.val
val_r = tree.r.val
@return_on_nonfinite_val(eval_options, val_ll, cX)
@return_on_nonfinite_val(eval_options, val_lr, cX)
@return_on_nonfinite_val(eval_options, val_r, cX)
x_l = op_l(val_ll, val_lr)::T
@return_on_nonfinite_val(eval_options, x_l, cX)
x = op(x_l, val_r)::T
@return_on_nonfinite_val(eval_options, x, cX)
return ResultOk(get_filled_array(eval_options.buffer, x, cX, axes(cX, 2)), true)
elseif tree.l.l.constant && tree.l.r.constant
val_ll = tree.l.l.val
val_lr = tree.l.r.val
@return_on_nonfinite_val(eval_options, val_ll, cX)
@return_on_nonfinite_val(eval_options, val_lr, cX)
feature_r = tree.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
x_l = op_l(val_ll, val_lr)::T
@turbo for j in axes(cX, 2)
x = op(x_l, cX[feature_r, j])
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.l.l.constant && tree.r.constant
val_ll = tree.l.l.val
val_r = tree.r.val
@return_on_nonfinite_val(eval_options, val_ll, cX)
@return_on_nonfinite_val(eval_options, val_r, cX)
feature_lr = tree.l.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_l = op_l(val_ll, cX[feature_lr, j])
x = op(x_l, val_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.l.r.constant && tree.r.constant
val_lr = tree.l.r.val
val_r = tree.r.val
@return_on_nonfinite_val(eval_options, val_lr, cX)
@return_on_nonfinite_val(eval_options, val_r, cX)
feature_ll = tree.l.l.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_l = op_l(cX[feature_ll, j], val_lr)
x = op(x_l, val_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.l.l.constant
val_ll = tree.l.l.val
@return_on_nonfinite_val(eval_options, val_ll, cX)
feature_lr = tree.l.r.feature
feature_r = tree.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_l = op_l(val_ll, cX[feature_lr, j])
x = op(x_l, cX[feature_r, j])
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.l.r.constant
val_lr = tree.l.r.val
@return_on_nonfinite_val(eval_options, val_lr, cX)
feature_ll = tree.l.l.feature
feature_r = tree.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_l = op_l(cX[feature_ll, j], val_lr)
x = op(x_l, cX[feature_r, j])
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.r.constant
val_r = tree.r.val
@return_on_nonfinite_val(eval_options, val_r, cX)
feature_ll = tree.l.l.feature
feature_lr = tree.l.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_l = op_l(cX[feature_ll, j], cX[feature_lr, j])
x = op(x_l, val_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
else
feature_ll = tree.l.l.feature
feature_lr = tree.l.r.feature
feature_r = tree.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_l = op_l(cX[feature_ll, j], cX[feature_lr, j])
x = op(x_l, cX[feature_r, j])
cumulator[j] = x
end
return ResultOk(cumulator, true)
end
end

function deg2_l0_r2_rl0_rr0_eval(
tree::AbstractExpressionNode{T},
cX::AbstractMatrix{T},
op::F,
op_r::F2,
eval_options::EvalOptions{true},
) where {T<:Number,F,F2}
if tree.l.constant && tree.r.l.constant && tree.r.r.constant
val_l = tree.l.val
val_rl = tree.r.l.val
val_rr = tree.r.r.val
@return_on_nonfinite_val(eval_options, val_l, cX)
@return_on_nonfinite_val(eval_options, val_rl, cX)
@return_on_nonfinite_val(eval_options, val_rr, cX)
x_r = op_r(val_rl, val_rr)::T
@return_on_nonfinite_val(eval_options, x_r, cX)
x = op(val_l, x_r)::T
@return_on_nonfinite_val(eval_options, x, cX)
return ResultOk(get_filled_array(eval_options.buffer, x, cX, axes(cX, 2)), true)
elseif tree.r.l.constant && tree.r.r.constant
val_rl = tree.r.l.val
val_rr = tree.r.r.val
@return_on_nonfinite_val(eval_options, val_rl, cX)
@return_on_nonfinite_val(eval_options, val_rr, cX)
feature_l = tree.l.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
x_r = op_r(val_rl, val_rr)::T
@turbo for j in axes(cX, 2)
x = op(cX[feature_l, j], x_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.l.constant && tree.r.r.constant
val_l = tree.l.val
val_rr = tree.r.r.val
@return_on_nonfinite_val(eval_options, val_l, cX)
@return_on_nonfinite_val(eval_options, val_rr, cX)
feature_rl = tree.r.l.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_r = op_r(cX[feature_rl, j], val_rr)
x = op(val_l, x_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.l.constant && tree.r.l.constant
val_l = tree.l.val
val_rl = tree.r.l.val
@return_on_nonfinite_val(eval_options, val_l, cX)
@return_on_nonfinite_val(eval_options, val_rl, cX)
feature_rr = tree.r.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_r = op_r(val_rl, cX[feature_rr, j])
x = op(val_l, x_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.l.constant
val_l = tree.l.val
@return_on_nonfinite_val(eval_options, val_l, cX)
feature_rl = tree.r.l.feature
feature_rr = tree.r.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_r = op_r(cX[feature_rl, j], cX[feature_rr, j])
x = op(val_l, x_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.r.l.constant
val_rl = tree.r.l.val
@return_on_nonfinite_val(eval_options, val_rl, cX)
feature_l = tree.l.feature
feature_rr = tree.r.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_r = op_r(val_rl, cX[feature_rr, j])
x = op(cX[feature_l, j], x_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
elseif tree.r.r.constant
val_rr = tree.r.r.val
@return_on_nonfinite_val(eval_options, val_rr, cX)
feature_l = tree.l.feature
feature_rl = tree.r.l.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_r = op_r(cX[feature_rl, j], val_rr)
x = op(cX[feature_l, j], x_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
else
feature_l = tree.l.feature
feature_rl = tree.r.l.feature
feature_rr = tree.r.r.feature
cumulator = get_array(eval_options.buffer, cX, axes(cX, 2))
@turbo for j in axes(cX, 2)
x_r = op_r(cX[feature_rl, j], cX[feature_rr, j])
x = op(cX[feature_l, j], x_r)
cumulator[j] = x
end
return ResultOk(cumulator, true)
end
end

end
Loading
Loading