From 638e47c04dca31c3434f88fccef8cc61315b6adf Mon Sep 17 00:00:00 2001 From: aroulin Date: Sun, 16 Aug 2015 11:51:21 +0200 Subject: [PATCH 1/2] Shader: implement EX2 and LG2 in interpreter --- src/video_core/shader/shader_interpreter.cpp | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index e14de07685..646171a195 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -334,6 +334,42 @@ void RunInterpreter(UnitState& state) { Record(state.debug, iteration, state.conditional_code); break; + case OpCode::Id::EX2: + { + Record(state.debug, iteration, src1); + Record(state.debug, iteration, dest); + + // EX2 only takes first component exp2 and writes it to all dest components + float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = ex2_res; + } + + Record(state.debug, iteration, dest); + break; + } + + case OpCode::Id::LG2: + { + Record(state.debug, iteration, src1); + Record(state.debug, iteration, dest); + + // LG2 only takes the first component log2 and writes it to all dest components + float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = lg2_res; + } + + Record(state.debug, iteration, dest); + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); From 7d3a6016d64eea0e523fe35bb61070a0268900f7 Mon Sep 17 00:00:00 2001 From: aroulin Date: Sun, 16 Aug 2015 17:22:49 +0200 Subject: [PATCH 2/2] Shader: implement EX2 and LG2 in JIT --- src/video_core/shader/shader_jit_x64.cpp | 22 ++++++++++++++++++++-- src/video_core/shader/shader_jit_x64.h | 2 ++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 836942c6bb..93f608584f 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -25,8 +25,8 @@ const JitFunction instr_table[64] = { &JitCompiler::Compile_DP4, // dp4 nullptr, // dph nullptr, // unknown - nullptr, // ex2 - nullptr, // lg2 + &JitCompiler::Compile_EX2, // ex2 + &JitCompiler::Compile_LG2, // lg2 nullptr, // unknown &JitCompiler::Compile_MUL, // mul nullptr, // lge @@ -331,6 +331,24 @@ void JitCompiler::Compile_DP4(Instruction instr) { Compile_DestEnable(instr, SRC1); } +void JitCompiler::Compile_EX2(Instruction instr) { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + MOVSS(XMM0, R(SRC1)); + ABI_CallFunction(reinterpret_cast(exp2f)); + SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); + MOVAPS(SRC1, R(XMM0)); + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_LG2(Instruction instr) { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + MOVSS(XMM0, R(SRC1)); + ABI_CallFunction(reinterpret_cast(log2f)); + SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); + MOVAPS(SRC1, R(XMM0)); + Compile_DestEnable(instr, SRC1); +} + void JitCompiler::Compile_MUL(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index b88f2a0d24..104f9f4668 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -37,6 +37,8 @@ public: void Compile_ADD(Instruction instr); void Compile_DP3(Instruction instr); void Compile_DP4(Instruction instr); + void Compile_EX2(Instruction instr); + void Compile_LG2(Instruction instr); void Compile_MUL(Instruction instr); void Compile_FLR(Instruction instr); void Compile_MAX(Instruction instr);