Compare commits

...

2 Commits

Author SHA1 Message Date
Mel Henning
b9275b54a1 nak/sm70_encode: Remove unused has_mod parameter
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34750>
2025-04-29 18:23:43 +00:00
Mel Henning
28d077838f nak/sm70_encode: Encode fneg/fabs for hfma2 src 2
and also stop legalizing away src 1 modifiers. Both of these are present,
they just move to a different place in the encoding.

Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34750>
2025-04-29 18:23:43 +00:00
2 changed files with 56 additions and 66 deletions

View File

@@ -547,3 +547,48 @@ pub fn test_lea() {
c.check(sm);
}
}
#[test]
pub fn test_hfma2() {
let r0 = RegRef::new(RegFile::GPR, 0, 1);
let r1 = RegRef::new(RegFile::GPR, 1, 1);
let r2 = RegRef::new(RegFile::GPR, 2, 1);
let r3 = RegRef::new(RegFile::GPR, 3, 1);
let src_mods = [SrcMod::None, SrcMod::FAbs, SrcMod::FNeg, SrcMod::FNegAbs];
for sm in SM_LIST {
let mut c = DisasmCheck::new();
for a_mod in src_mods {
for b_mod in src_mods {
for c_mod in src_mods {
let mut instr = OpHFma2 {
dst: Dst::Reg(r0),
srcs: [
SrcRef::Reg(r1).into(),
SrcRef::Reg(r2).into(),
SrcRef::Reg(r3).into(),
],
saturate: false,
ftz: false,
dnz: false,
f32: false,
};
instr.srcs[0].src_mod = a_mod;
instr.srcs[1].src_mod = b_mod;
instr.srcs[2].src_mod = c_mod;
let disasm = format!(
"hfma2 r0, {}, {}, {};",
instr.srcs[0], instr.srcs[1], instr.srcs[2],
);
c.push(instr, disasm);
}
}
}
c.check(sm);
}
}

View File

@@ -326,14 +326,6 @@ impl ALUSrc {
_ => panic!("Invalid ALU source"),
}
}
pub fn has_src_mod(&self) -> bool {
match self {
ALUSrc::Reg(reg) | ALUSrc::UReg(reg) => reg.abs || reg.neg,
ALUSrc::CBuf(cb) => cb.abs || cb.neg,
_ => false,
}
}
}
impl SM70Encoder<'_> {
@@ -358,7 +350,6 @@ impl SM70Encoder<'_> {
swizzle_range: Range<usize>,
file: RegFile,
is_fp16_alu: bool,
has_mod: bool,
reg: &ALURegRef,
) {
match file {
@@ -367,12 +358,8 @@ impl SM70Encoder<'_> {
_ => panic!("Invalid ALU src register file"),
}
if has_mod {
self.set_bit(abs_bit, reg.abs);
self.set_bit(neg_bit, reg.neg);
} else {
assert!(!reg.abs && !reg.neg);
}
self.set_bit(abs_bit, reg.abs);
self.set_bit(neg_bit, reg.neg);
if is_fp16_alu {
self.set_swizzle(swizzle_range, reg.swizzle);
@@ -392,7 +379,7 @@ impl SM70Encoder<'_> {
ALUSrc::Reg(reg) => reg,
_ => panic!("Invalid ALU src"),
};
self.set_alu_reg(24..32, 73, 72, 74..76, file, is_fp16_alu, true, reg);
self.set_alu_reg(24..32, 73, 72, 74..76, file, is_fp16_alu, reg);
}
fn encode_alu_src2(
@@ -400,7 +387,6 @@ impl SM70Encoder<'_> {
src: &ALUSrc,
file: RegFile,
is_fp16_alu: bool,
bit74_75_are_mod: bool,
) {
let reg = match src {
ALUSrc::None => return,
@@ -409,12 +395,11 @@ impl SM70Encoder<'_> {
};
self.set_alu_reg(
64..72,
74,
75,
if is_fp16_alu { 83 } else { 74 },
if is_fp16_alu { 84 } else { 75 },
81..83,
file,
is_fp16_alu,
bit74_75_are_mod,
reg,
);
}
@@ -427,7 +412,6 @@ impl SM70Encoder<'_> {
60..62,
RegFile::GPR,
is_fp16_alu,
true,
reg,
);
}
@@ -479,25 +463,11 @@ impl SM70Encoder<'_> {
let src1 = ALUSrc::from_src(self, src1, false);
let src2 = ALUSrc::from_src(self, src2, false);
// Bits 74..76 are used both for the swizzle on src0 and for the source
// modifier for the register source of src1 and src2. When both are
// registers, it's used for src2. The hardware elects to always support
// a swizzle and not support source modifiers in that case.
let bit74_75_are_mod = !is_fp16_alu
|| matches!(src1, ALUSrc::None)
|| matches!(src2, ALUSrc::None);
debug_assert!(bit74_75_are_mod || !src2.has_src_mod());
self.encode_alu_src0(&src0, RegFile::GPR, is_fp16_alu);
let form = match &src2 {
ALUSrc::None | ALUSrc::Reg(_) => {
self.encode_alu_src2(
&src2,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
self.encode_alu_src2(&src2, RegFile::GPR, is_fp16_alu);
match &src1 {
ALUSrc::None => 1_u8, // form
ALUSrc::Reg(reg1) => {
@@ -520,33 +490,18 @@ impl SM70Encoder<'_> {
}
ALUSrc::UReg(reg2) => {
self.encode_alu_ureg(reg2, is_fp16_alu);
self.encode_alu_src2(
&src1,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
self.encode_alu_src2(&src1, RegFile::GPR, is_fp16_alu);
7_u8 // form
}
ALUSrc::Imm32(imm2) => {
self.encode_alu_imm(imm2);
self.encode_alu_src2(
&src1,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
self.encode_alu_src2(&src1, RegFile::GPR, is_fp16_alu);
2_u8 // form
}
ALUSrc::CBuf(cb2) => {
// TODO set_src_cx
self.encode_alu_cb(cb2, is_fp16_alu);
self.encode_alu_src2(
&src1,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
self.encode_alu_src2(&src1, RegFile::GPR, is_fp16_alu);
3_u8 // form
}
};
@@ -599,7 +554,7 @@ impl SM70Encoder<'_> {
self.encode_alu_src0(&src0, RegFile::UGPR, false);
let form = match &src2 {
ALUSrc::None | ALUSrc::Reg(_) => {
self.encode_alu_src2(&src2, RegFile::UGPR, false, true);
self.encode_alu_src2(&src2, RegFile::UGPR, false);
match &src1 {
ALUSrc::None => 1_u8, // form
ALUSrc::Reg(reg1) => {
@@ -617,7 +572,7 @@ impl SM70Encoder<'_> {
ALUSrc::UReg(_) => panic!("UALU never has UReg"),
ALUSrc::Imm32(imm2) => {
self.encode_alu_imm(imm2);
self.encode_alu_src2(&src1, RegFile::UGPR, false, true);
self.encode_alu_src2(&src1, RegFile::UGPR, false);
2_u8 // form
}
ALUSrc::CBuf(_) => panic!("UALU does not support cbufs"),
@@ -1112,19 +1067,9 @@ impl SM70Op for OpHFma2 {
b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2);
b.copy_alu_src_if_not_reg(src1, gpr, SrcType::F16v2);
b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F16v2);
if !src1.src_mod.is_none() {
b.copy_alu_src_and_lower_fmod(src1, gpr, SrcType::F16v2);
}
if !src2.src_mod.is_none() {
b.copy_alu_src_and_lower_fmod(src2, gpr, SrcType::F16v2);
}
}
fn encode(&self, e: &mut SM70Encoder<'_>) {
assert!(self.srcs[1].src_mod.is_none());
assert!(self.srcs[2].src_mod.is_none());
e.encode_fp16_alu(
0x031,
Some(&self.dst),