Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-organize intrinsic-test to enable seamless addition of behaviour testing for more architectures #1758

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use super::format::Indentation;
use super::json_parser::ArgPrep;
use super::types::{IntrinsicType, TypeKind};
use crate::common::types::Language;
use std::ops::Range;

use crate::Language;
use crate::format::Indentation;
use crate::json_parser::ArgPrep;
use crate::types::{IntrinsicType, TypeKind};

/// An argument for the intrinsic.
#[derive(Debug, PartialEq, Clone)]
pub struct Argument {
Expand Down
34 changes: 34 additions & 0 deletions crates/intrinsic-test/src/arm/config.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
pub fn build_notices(line_prefix: &str) -> String {
format!(
"\
{line_prefix}This is a transient test file, not intended for distribution. Some aspects of the
{line_prefix}test are derived from a JSON specification, published under the same license as the
{line_prefix}`intrinsic-test` crate.\n
"
)
}

pub const POLY128_OSTREAM_DEF: &str = r#"std::ostream& operator<<(std::ostream& os, poly128_t value) {
std::stringstream temp;
do {
int n = value % 10;
value /= 10;
temp << n;
} while (value != 0);
std::string tempstr(temp.str());
std::string res(tempstr.rbegin(), tempstr.rend());
os << res;
return os;
}"#;

pub const AARCH_CONFIGURATIONS: &str = r#"
#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))]
#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
#![feature(stdarch_neon_f16)]
"#;
277 changes: 277 additions & 0 deletions crates/intrinsic-test/src/arm/functions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
use super::argument::Argument;
use super::config::{AARCH_CONFIGURATIONS, POLY128_OSTREAM_DEF, build_notices};
use super::format::Indentation;
use super::intrinsic::Intrinsic;
use crate::common::gen_c::{compile_c, create_c_filenames, generate_c_program};
use crate::common::gen_rust::{compile_rust, create_rust_filenames, generate_rust_program};
use crate::common::write_file;
use itertools::Itertools;
use rayon::prelude::*;
use std::collections::BTreeMap;

// The number of times each intrinsic will be called.
const PASSES: u32 = 20;

fn gen_code_c(
indentation: Indentation,
intrinsic: &Intrinsic,
constraints: &[&Argument],
name: String,
target: &str,
) -> String {
if let Some((current, constraints)) = constraints.split_last() {
let range = current
.constraints
.iter()
.map(|c| c.to_range())
.flat_map(|r| r.into_iter());

let body_indentation = indentation.nested();
range
.map(|i| {
format!(
"{indentation}{{\n\
{body_indentation}{ty} {name} = {val};\n\
{pass}\n\
{indentation}}}",
name = current.name,
ty = current.ty.c_type(),
val = i,
pass = gen_code_c(
body_indentation,
intrinsic,
constraints,
format!("{name}-{i}"),
target,
)
)
})
.join("\n")
} else {
intrinsic.generate_loop_c(indentation, &name, PASSES, target)
}
}

fn generate_c_program_arm(header_files: &[&str], intrinsic: &Intrinsic, target: &str) -> String {
let constraints = intrinsic
.arguments
.iter()
.filter(|i| i.has_constraint())
.collect_vec();

let indentation = Indentation::default();
generate_c_program(
build_notices("// ").as_str(),
header_files,
"aarch64",
&[POLY128_OSTREAM_DEF],
intrinsic
.arguments
.gen_arglists_c(indentation, PASSES)
.as_str(),
gen_code_c(
indentation.nested(),
intrinsic,
constraints.as_slice(),
Default::default(),
target,
)
.as_str(),
)
}

fn gen_code_rust(
indentation: Indentation,
intrinsic: &Intrinsic,
constraints: &[&Argument],
name: String,
) -> String {
if let Some((current, constraints)) = constraints.split_last() {
let range = current
.constraints
.iter()
.map(|c| c.to_range())
.flat_map(|r| r.into_iter());

let body_indentation = indentation.nested();
range
.map(|i| {
format!(
"{indentation}{{\n\
{body_indentation}const {name}: {ty} = {val};\n\
{pass}\n\
{indentation}}}",
name = current.name,
ty = current.ty.rust_type(),
val = i,
pass = gen_code_rust(
body_indentation,
intrinsic,
constraints,
format!("{name}-{i}")
)
)
})
.join("\n")
} else {
intrinsic.generate_loop_rust(indentation, &name, PASSES)
}
}

fn generate_rust_program_arm(intrinsic: &Intrinsic, target: &str) -> String {
let constraints = intrinsic
.arguments
.iter()
.filter(|i| i.has_constraint())
.collect_vec();

let indentation = Indentation::default();
let final_target = if target.contains("v7") {
"arm"
} else {
"aarch64"
};
generate_rust_program(
build_notices("// ").as_str(),
AARCH_CONFIGURATIONS,
final_target,
intrinsic
.arguments
.gen_arglists_rust(indentation.nested(), PASSES)
.as_str(),
gen_code_rust(
indentation.nested(),
intrinsic,
&constraints,
Default::default(),
)
.as_str(),
)
}

fn compile_c_arm(
intrinsics_name_list: &Vec<String>,
filename_mapping: BTreeMap<&String, String>,
compiler: &str,
target: &str,
cxx_toolchain_dir: Option<&str>,
) -> bool {
let compiler_commands = intrinsics_name_list.iter().map(|intrinsic_name| {
let c_filename = filename_mapping.get(intrinsic_name).unwrap();
let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
let arch_flags = if target.contains("v7") {
"-march=armv8.6-a+crypto+crc+dotprod+fp16"
} else {
"-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax+lut"
};

let compiler_command = if target == "aarch64_be-unknown-linux-gnu" {
let Some(cxx_toolchain_dir) = cxx_toolchain_dir else {
panic!(
"When setting `--target aarch64_be-unknown-linux-gnu` the C++ compilers toolchain directory must be set with `--cxx-toolchain-dir <dest>`"
);
};

/* clang++ cannot link an aarch64_be object file, so we invoke
* aarch64_be-unknown-linux-gnu's C++ linker. This ensures that we
* are testing the intrinsics against LLVM.
*
* Note: setting `--sysroot=<...>` which is the obvious thing to do
* does not work as it gets caught up with `#include_next <stdlib.h>`
* not existing... */
format!(
Copy link
Author

@madhav-madhusoodanan madhav-madhusoodanan Apr 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be a good idea to make the compilation commands part common too?

Parts like:

  1. Include-paths, target
  2. Output binary
  3. Linking steps
  4. Optional cleanup step, etc

could be organized within a struct, post which we could add in functionality (in common module) to convert the struct into the commands.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it's a good idea. In general there will be 2 cases to consider:

  • When running natively on target hardware, we can just use the host toolchain. This is what currently happens for the aarch64 tests which run on an aarch64 Github runner.
  • When running emulated using qemu, we need to use a custom toolchain. We still need to use clang, but we need to point it to the C headers and libraries of the cross-compilation toolchain.

All architectures can be generalized to one of these 2 cases.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see
Awesome, I'll do that too.

"{compiler} {flags} {arch_flags} \
-ffp-contract=off \
-Wno-narrowing \
-O2 \
--target=aarch64_be-unknown-linux-gnu \
-I{cxx_toolchain_dir}/include \
-I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include \
-I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.2.1 \
-I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu \
-I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward \
-I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/include \
-c {c_filename} \
-o c_programs/{intrinsic_name}.o && \
{cxx_toolchain_dir}/bin/aarch64_be-none-linux-gnu-g++ c_programs/{intrinsic_name}.o -o c_programs/{intrinsic_name} && \
rm c_programs/{intrinsic_name}.o",
)
} else {
// -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
let base_compiler_command = format!(
"{compiler} {flags} {arch_flags} -o c_programs/{intrinsic_name} {c_filename} -ffp-contract=off -Wno-narrowing -O2"
);

/* `-target` can be passed to some c++ compilers, however if we want to
* use a c++ compiler does not support this flag we do not want to pass
* the flag. */
if compiler.contains("clang") {
format!("{base_compiler_command} -target {target}")
} else {
format!("{base_compiler_command} -flax-vector-conversions")
}
};

compiler_command
})
.collect::<Vec<_>>();

compile_c(&compiler_commands)
}

pub fn build_c(
intrinsics: &Vec<Intrinsic>,
compiler: Option<&str>,
target: &str,
cxx_toolchain_dir: Option<&str>,
) -> bool {
let intrinsics_name_list = intrinsics
.par_iter()
.map(|i| i.name.clone())
.collect::<Vec<_>>();
let filename_mapping = create_c_filenames(&intrinsics_name_list);

intrinsics.par_iter().for_each(|i| {
let c_code = generate_c_program_arm(&["arm_neon.h", "arm_acle.h", "arm_fp16.h"], i, target);
match filename_mapping.get(&i.name) {
Some(filename) => write_file(filename, c_code),
None => {}
};
});

match compiler {
None => true,
Some(compiler) => compile_c_arm(
&intrinsics_name_list,
filename_mapping,
compiler,
target,
cxx_toolchain_dir,
),
}
}

pub fn build_rust(
intrinsics: &[Intrinsic],
toolchain: Option<&str>,
target: &str,
linker: Option<&str>,
) -> bool {
let intrinsics_name_list = intrinsics
.par_iter()
.map(|i| i.name.clone())
.collect::<Vec<_>>();
let filename_mapping = create_rust_filenames(&intrinsics_name_list);

intrinsics.par_iter().for_each(|i| {
let rust_code = generate_rust_program_arm(i, target);
match filename_mapping.get(&i.name) {
Some(filename) => write_file(filename, rust_code),
None => {}
}
});

let intrinsics_name_list = intrinsics.iter().map(|i| i.name.as_str()).collect_vec();

compile_rust(&intrinsics_name_list, toolchain, target, linker)
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::format::Indentation;
use crate::types::{IntrinsicType, TypeKind};

use super::argument::ArgumentList;
use super::format::Indentation;
use super::types::{IntrinsicType, TypeKind};

/// An intrinsic
#[derive(Debug, PartialEq, Clone)]
Expand Down Expand Up @@ -82,8 +81,6 @@ impl Intrinsic {
String::from("")
},
close = if self.results.is_simd() { ")" } else { "" },
lanes = lanes,
additional = additional,
)
}

Expand Down Expand Up @@ -135,7 +132,6 @@ impl Intrinsic {
intrinsic_call = self.name,
const = constraints,
args = self.arguments.as_call_param_rust(),
additional = additional,
)
}
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
use super::argument::{Argument, ArgumentList};
use super::intrinsic::Intrinsic;
use super::types::IntrinsicType;
use serde::Deserialize;
use std::collections::HashMap;
use std::path::Path;

use serde::Deserialize;

use crate::argument::{Argument, ArgumentList};
use crate::intrinsic::Intrinsic;
use crate::types::IntrinsicType;

#[derive(Deserialize, Debug)]
#[serde(deny_unknown_fields)]
struct ReturnType {
Expand Down
Loading