Skip to content

Commit 36ab9fe

Browse files
authored
Merge branch 'openjdk:code-reflection' into array-view
2 parents 9739e6f + 02b2059 commit 36ab9fe

315 files changed

Lines changed: 8494 additions & 6135 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ jtreg -jdk:./build/macosx-x86_64-server-release/jdk/ -ea -esa -avm -va test/lang
3131
Specific runtime tests can be executed using `jtreg`, for example:
3232

3333
```
34-
jtreg -jdk:./build/macosx-x86_64-server-release/jdk/ -ea -esa -avm -va test/jdk/java/lang/reflect/code/
34+
jtreg -jdk:./build/macosx-x86_64-server-release/jdk/ -ea -esa -avm -va test/jdk/jdk/incubator/code/
3535
```
3636

3737
In addition, the runtime tests can be executed using make with the test group
38-
`jdk_lang_reflect_code` as follows:
38+
`jdk_incubator_code` as follows:
3939

4040
```
41-
make test TEST=jdk_lang_reflect_code
41+
make test TEST=jdk_incubator_code
4242
```

cr-examples/onnx/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ sh setup.sh path/to/cloned/onnxruntime
4747

4848
Setup:
4949
- Download [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai/releases) native library coresponding to your system/architecture, unzip and put it into `cr-examples/onnx/lib` folder.
50-
- Download `model.onnx.data`, `tokenizer.json` and `tokenizer_config.json` data files from [Llama-3.2-1B-Instruct-ONNX](https://huggingface.co/onnx-community/Llama-3.2-1B-Instruct-ONNX/tree/main/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4) and put them into `cr-examples/onnx/src/test/resources/oracle/code/onnx/llm` folder.
50+
- Download `model_q4.onnx_data`, `tokenizer.json` and `tokenizer_config.json` data files from [Llama-3.2-1B-Instruct-ONNX](https://huggingface.co/onnx-community/Llama-3.2-1B-Instruct-ONNX/tree/main) and put them into `cr-examples/onnx/src/test/resources/oracle/code/onnx/llm` folder.
5151

5252
Running the Llama demo:
5353
```

cr-examples/onnx/src/test/java/oracle/code/onnx/llm/LlamaModel.java

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -43,32 +43,36 @@ public final class LlamaModel {
4343
VOCAB_SIZE = 128256,
4444
HEAD_SIZE = 64,
4545
HIDEN_SIZE = 2048,
46+
KV_HIDEN_SIZE = 512,
4647
CONTEXT_SIZE = 131072,
4748
INTERMEDIATE_SIZE = 8192,
4849
ATTN_WEIGHTS_SIZE = 3072;
4950
public static final float EPSILON = 1.0E-5f,
5051
SCALE = 0.125f;
5152

5253
public final Tensor<Long> flat1, scalar1;
53-
public final Tensor<Float> tokensWeights, initWeight, cosCache, sinCache, headScales;
54+
public final Tensor<Float> tokensWeights, initWeight, cosCache, sinCache;
5455
public final Tensor<Float>[] postAttentionWeights = new Tensor[LAYERS],
5556
inputWeights = new Tensor[LAYERS],
56-
attnQkvScales = new Tensor[LAYERS],
57+
attnQScales = new Tensor[LAYERS],
58+
attnKScales = new Tensor[LAYERS],
59+
attnVScales = new Tensor[LAYERS],
5760
attnOScales = new Tensor[LAYERS],
5861
mlpGateScales = new Tensor[LAYERS],
5962
mlpUpScales = new Tensor[LAYERS],
6063
mlpDownScales = new Tensor[LAYERS];
61-
public final Tensor<Byte>[] attnQkvWeight = new Tensor[LAYERS],
64+
public final Tensor<Byte>[] attnQWeight = new Tensor[LAYERS],
65+
attnKWeight = new Tensor[LAYERS],
66+
attnVWeight = new Tensor[LAYERS],
6267
attnOWeight = new Tensor[LAYERS],
6368
mlpGateWeight = new Tensor[LAYERS],
6469
mlpUpWeight = new Tensor[LAYERS],
6570
mlpDownWeight = new Tensor[LAYERS];
66-
public final Tensor<Byte> headWeight;
6771

6872
public LlamaModel(Arena arena) throws IOException {
6973
flat1 = Tensor.ofFlat(arena, 1l);
7074
scalar1 = Tensor.ofScalar(arena, 1l);
71-
var modelData = new TensorDataStream(arena, LlamaModel.class.getResource("model.onnx.data").getPath());
75+
var modelData = new TensorDataStream(arena, LlamaModel.class.getResource("model_q4.onnx_data").getPath());
7276
tokensWeights = modelData.nextTensor(FLOAT, VOCAB_SIZE, HIDEN_SIZE);
7377
initWeight = modelData.nextTensor(FLOAT, HIDEN_SIZE);
7478
cosCache = modelData.nextTensor(FLOAT, CONTEXT_SIZE, HEAD_SIZE / 2);
@@ -78,19 +82,21 @@ public LlamaModel(Arena arena) throws IOException {
7882
inputWeights[i] = modelData.nextTensor(FLOAT, HIDEN_SIZE);
7983
}
8084
for (int i = 0; i < LAYERS; i++) {
81-
attnQkvWeight[i] = modelData.nextTensor(UINT8, ATTN_WEIGHTS_SIZE, HEAD_SIZE, 16);
82-
attnQkvScales[i] = modelData.nextTensor(FLOAT, ATTN_WEIGHTS_SIZE * HEAD_SIZE);
85+
attnQWeight[i] = modelData.nextTensor(UINT8, HIDEN_SIZE, HEAD_SIZE, 16);
86+
attnQScales[i] = modelData.nextTensor(FLOAT, HIDEN_SIZE, HEAD_SIZE);
87+
attnKWeight[i] = modelData.nextTensor(UINT8, KV_HIDEN_SIZE, HEAD_SIZE, 16);
88+
attnKScales[i] = modelData.nextTensor(FLOAT, KV_HIDEN_SIZE, HEAD_SIZE);
89+
attnVWeight[i] = modelData.nextTensor(UINT8, KV_HIDEN_SIZE, HEAD_SIZE, 16);
90+
attnVScales[i] = modelData.nextTensor(FLOAT, KV_HIDEN_SIZE, HEAD_SIZE);
8391
attnOWeight[i] = modelData.nextTensor(UINT8, HIDEN_SIZE, HEAD_SIZE, 16);
84-
attnOScales[i] = modelData.nextTensor(FLOAT, HIDEN_SIZE * HEAD_SIZE);
92+
attnOScales[i] = modelData.nextTensor(FLOAT, HIDEN_SIZE, HEAD_SIZE);
8593
mlpGateWeight[i] = modelData.nextTensor(UINT8, INTERMEDIATE_SIZE, HEAD_SIZE, 16);
86-
mlpGateScales[i] = modelData.nextTensor(FLOAT, INTERMEDIATE_SIZE * HEAD_SIZE);
94+
mlpGateScales[i] = modelData.nextTensor(FLOAT, INTERMEDIATE_SIZE, HEAD_SIZE);
8795
mlpUpWeight[i] = modelData.nextTensor(UINT8, INTERMEDIATE_SIZE, HEAD_SIZE, 16);
88-
mlpUpScales[i] = modelData.nextTensor(FLOAT, INTERMEDIATE_SIZE * HEAD_SIZE);
96+
mlpUpScales[i] = modelData.nextTensor(FLOAT, INTERMEDIATE_SIZE, HEAD_SIZE);
8997
mlpDownWeight[i] = modelData.nextTensor(UINT8, HIDEN_SIZE, 256, 16);
90-
mlpDownScales[i] = modelData.nextTensor(FLOAT, INTERMEDIATE_SIZE * HEAD_SIZE);
98+
mlpDownScales[i] = modelData.nextTensor(FLOAT, HIDEN_SIZE, 256);
9199
}
92-
headWeight = modelData.nextTensor(UINT8, VOCAB_SIZE, HEAD_SIZE, 16);
93-
headScales = modelData.nextTensor(FLOAT, VOCAB_SIZE * HEAD_SIZE);
94100
}
95101

96102
public record ForwardResponse(Tensor<Float> logits,
@@ -110,12 +116,15 @@ public ForwardResponse forward(Tensor<Long> inputIds, Tensor<Long> attentionMask
110116
Tensor<Float>[] presentValues = new Tensor[LAYERS];
111117

112118
for (int i = 0; i < LAYERS; i++) {
113-
GroupQueryAttention<Float> attn = GroupQueryAttention(
114-
MatMulNBits(input,
115-
attnQkvWeight[i],
116-
attnQkvScales[i], empty(), empty(), empty(), HIDEN_SIZE, ATTN_WEIGHTS_SIZE, of(ACCURACY_LEVEL), BITS, BLOCK_SIZE),
117-
empty(),
118-
empty(),
119+
GroupQueryAttention<Float> attn = GroupQueryAttention(MatMulNBits(input,
120+
attnQWeight[i],
121+
attnQScales[i], empty(), empty(), empty(), HIDEN_SIZE, HIDEN_SIZE, of(ACCURACY_LEVEL), BITS, BLOCK_SIZE),
122+
of(MatMulNBits(input,
123+
attnKWeight[i],
124+
attnKScales[i], empty(), empty(), empty(), HIDEN_SIZE, KV_HIDEN_SIZE, of(ACCURACY_LEVEL), BITS, BLOCK_SIZE)),
125+
of(MatMulNBits(input,
126+
attnVWeight[i],
127+
attnVScales[i], empty(), empty(), empty(), HIDEN_SIZE, KV_HIDEN_SIZE, of(ACCURACY_LEVEL), BITS, BLOCK_SIZE)),
119128
of(pastKey[i]),
120129
of(pastValue[i]),
121130
amSL,
@@ -150,9 +159,7 @@ mlpDownScales[i], empty(), empty(), empty(), INTERMEDIATE_SIZE, HIDEN_SIZE, of(A
150159
presentValues[i] = attn.present_value();
151160
}
152161

153-
Tensor<Float> logits = MatMulNBits(input,
154-
headWeight,
155-
headScales, empty(), empty(), empty(), HIDEN_SIZE, VOCAB_SIZE, of(ACCURACY_LEVEL), BITS, BLOCK_SIZE);
162+
Tensor<Float> logits = MatMul(input, Transpose(tokensWeights, of(new long[] {1L, 0L})));
156163

157164
return new ForwardResponse(logits, presentKeys, presentValues);
158165
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/model.onnx.data
2+
/model_q4.onnx_data
23
/tokenizer_config.json
34
/tokenizer.json
45

hat/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,4 @@ stage/
2727
compile_flags.txt
2828
remoteTesting.conf
2929
test_report.txt
30+
var/*

0 commit comments

Comments
 (0)