/* * libbinrec: a recompiling translator for machine code * Copyright (c) 2016 Andrew Church * * This software may be copied and redistributed under certain conditions; * see the file "COPYING" in the source code distribution for details. * NO WARRANTY is provided with this software. */ #include "src/bitutils.h" #include "src/common.h" #include "src/endian.h" #include "src/guest-ppc/guest-ppc-decode.h" #include "src/guest-ppc/guest-ppc-internal.h" #include "src/rtl.h" #include "src/rtl-internal.h" /*************************************************************************/ /********************** Low-level Utility routines ***********************/ /*************************************************************************/ /** * rtl_imm32: Allocate and return a new RTL register of type INT32 * containing the given immediate value. */ static inline int rtl_imm32(RTLUnit * const unit, uint32_t value) { const int reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, reg, 0, 0, value); return reg; } /*-----------------------------------------------------------------------*/ /** * rtl_imm64: Allocate and return a new RTL register of type INT64 * containing the given immediate value. */ static inline int rtl_imm64(RTLUnit * const unit, uint64_t value) { const int reg = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_LOAD_IMM, reg, 0, 0, value); return reg; } /*-----------------------------------------------------------------------*/ /** * fcast_32to64: Convert an RTL register from FLOAT32 to FLOAT64, * optionally checking for SNaNs and preserving their non-quiet state. * * [Parameters] * unit: RTLUnit to add operate on. * reg: RTL register to convert. * check_snan: True to check for SNaN inputs. * [Return value] * RTL register holding the converted value. */ static int fcast_32to64(RTLUnit *unit, int reg, bool check_snan) { int is_nan = 0; if (check_snan) { is_nan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, is_nan, reg, reg, RTLFCMP_UN); } int new_reg = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_FCVT, new_reg, reg, 0, 0); if (check_snan) { int alias = rtl_alloc_alias_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_reg, 0, alias); const int label_not_snan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, is_nan, 0, label_not_snan); const int bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, bits, reg, 0, 0); const int is_qnan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, is_qnan, bits, 0, 1<<22); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, is_qnan, 0, label_not_snan); const int bits64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, bits64, new_reg, 0, 0); const int quietbit = rtl_imm64(unit, UINT64_C(1)<<51); const int newbits64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_XOR, newbits64, bits64, quietbit, 0); const int newval = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, newval, newbits64, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, newval, 0, alias); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_not_snan); new_reg = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, new_reg, 0, 0, alias); } return new_reg; } /*-----------------------------------------------------------------------*/ /** * fcast_64to32: Convert an RTL register from FLOAT64 to FLOAT32, * optionally checking for SNaNs and preserving their non-quiet state. * * [Parameters] * unit: RTLUnit to add operate on. * reg: RTL register to convert. * check_snan: True to check for SNaN inputs. * [Return value] * RTL register holding the converted value. */ static int fcast_64to32(RTLUnit *unit, int reg, bool check_snan) { int is_nan = 0; if (check_snan) { is_nan = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_FCMP, is_nan, reg, reg, RTLFCMP_UN); } int new_reg = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, new_reg, reg, 0, 0); if (check_snan) { int alias = rtl_alloc_alias_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_reg, 0, alias); const int label_not_snan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, is_nan, 0, label_not_snan); const int bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, bits, reg, 0, 0); const int is_qnan = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, is_qnan, bits, 0, 51 | 1<<8); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, is_qnan, 0, label_not_snan); const int bits32 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, bits32, new_reg, 0, 0); const int newbits32 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, newbits32, bits32, 0, 1<<22); const int newval = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, newval, newbits32, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, newval, 0, alias); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_not_snan); new_reg = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, new_reg, 0, 0, alias); } return new_reg; } /*-----------------------------------------------------------------------*/ /** * vfcast_32to64: Convert an RTL register from V2_FLOAT32 to V2_FLOAT64, * optionally checking for SNaNs and preserving their non-quiet state. * * [Parameters] * unit: RTLUnit to add operate on. * reg: RTL register to convert. * check_snan: True to check for SNaN inputs. * [Return value] * RTL register holding the converted value. */ static int vfcast_32to64(RTLUnit *unit, int reg, bool check_snan) { int nan_check = 0; if (check_snan) { nan_check = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_VFCMP, nan_check, reg, reg, RTLFCMP_UN); } int new_reg = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VFCVT, new_reg, reg, 0, 0); if (check_snan) { int alias = rtl_alloc_alias_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_reg, 0, alias); const int label_not_nan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, nan_check, 0, label_not_nan); // FIXME: This could be done more efficiently if we had integer // vector types and an AND-complement instruction. const int ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, ps0, reg, 0, 0); const int ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, ps1, reg, 0, 1); const int nan0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLLI, nan0, nan_check, 0, 32); const int nan1 = nan_check; const int bits0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, bits0, ps0, 0, 0); const int bits1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, bits1, ps1, 0, 0); const int notbits0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NOT, notbits0, bits0, 0, 0); const int notbits1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NOT, notbits1, bits1, 0, 0); const int quiet32_0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, quiet32_0, notbits0, 0, 1<<22); const int quiet32_1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, quiet32_1, notbits1, 0, 1<<22); const int result0 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, result0, new_reg, 0, 0); const int result1 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, result1, new_reg, 0, 1); const int temp64_0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ZCAST, temp64_0, quiet32_0, 0, 0); const int temp64_1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ZCAST, temp64_1, quiet32_1, 0, 0); const int quiet64_0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLLI, quiet64_0, temp64_0, 0, 29); const int quiet64_1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLLI, quiet64_1, temp64_1, 0, 29); const int oldbits0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, oldbits0, result0, 0, 0); const int oldbits1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, oldbits1, result1, 0, 0); const int flipbit0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_AND, flipbit0, quiet64_0, nan0, 0); const int flipbit1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_AND, flipbit1, quiet64_1, nan1, 0); const int newbits0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_XOR, newbits0, oldbits0, flipbit0, 0); const int newbits1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_XOR, newbits1, oldbits1, flipbit1, 0); const int newval0 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, newval0, newbits0, 0, 0); const int newval1 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, newval1, newbits1, 0, 0); const int newval = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBUILD2, newval, newval0, newval1, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, newval, 0, alias); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_not_nan); new_reg = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, new_reg, 0, 0, alias); } return new_reg; } /*-----------------------------------------------------------------------*/ /** * vfcast_64to32: Convert an RTL register from V2_FLOAT64 to V2_FLOAT32, * optionally checking for SNaNs and preserving their non-quiet state. * * [Parameters] * unit: RTLUnit to add operate on. * reg: RTL register to convert. * check_snan: True to check for SNaN inputs. * [Return value] * RTL register holding the converted value. */ static int vfcast_64to32(RTLUnit *unit, int reg, bool check_snan) { int nan_check = 0; if (check_snan) { nan_check = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_VFCMP, nan_check, reg, reg, RTLFCMP_UN); } int new_reg = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VFCVT, new_reg, reg, 0, 0); if (check_snan) { int alias = rtl_alloc_alias_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_reg, 0, alias); const int label_not_nan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, nan_check, 0, label_not_nan); // FIXME: This could be done more efficiently if we had integer // vector types and an AND-complement instruction. const int ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, ps0, reg, 0, 0); const int ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, ps1, reg, 0, 1); const int nan0_64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, nan0_64, nan_check, 0, 0 | 32<<8); const int nan1_64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, nan1_64, nan_check, 0, 32 | 32<<8); const int nan0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, nan0, nan0_64, 0, 0); const int nan1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, nan1, nan1_64, 0, 0); const int bits0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, bits0, ps0, 0, 0); const int bits1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, bits1, ps1, 0, 0); const int notbits0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_NOT, notbits0, bits0, 0, 0); const int notbits1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_NOT, notbits1, bits1, 0, 0); const int quietbit = rtl_imm64(unit, UINT64_C(1)<<51); const int quiet64_0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_AND, quiet64_0, notbits0, quietbit, 0); const int quiet64_1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_AND, quiet64_1, notbits1, quietbit, 0); const int result0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, result0, new_reg, 0, 0); const int result1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, result1, new_reg, 0, 1); const int temp64_0 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SRLI, temp64_0, quiet64_0, 0, 29); const int temp64_1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SRLI, temp64_1, quiet64_1, 0, 29); const int quiet32_0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, quiet32_0, temp64_0, 0, 0); const int quiet32_1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, quiet32_1, temp64_1, 0, 0); const int oldbits0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, oldbits0, result0, 0, 0); const int oldbits1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, oldbits1, result1, 0, 0); const int flipbit0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, flipbit0, quiet32_0, nan0, 0); const int flipbit1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, flipbit1, quiet32_1, nan1, 0); const int newbits0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XOR, newbits0, oldbits0, flipbit0, 0); const int newbits1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XOR, newbits1, oldbits1, flipbit1, 0); const int newval0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, newval0, newbits0, 0, 0); const int newval1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, newval1, newbits1, 0, 0); const int newval = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, newval, newval0, newval1, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, newval, 0, alias); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_not_nan); new_reg = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, new_reg, 0, 0, alias); } return new_reg; } /*-----------------------------------------------------------------------*/ /** * crm_to_mask: Return a 32-bit mask corresponding to an 8-bit CRM field * for mtcrf or mtfsf. */ static inline CONST_FUNCTION uint32_t crm_to_mask(uint8_t crm) { static const uint32_t masks[256] = { 0x00000000, 0x0000000F, 0x000000F0, 0x000000FF, 0x00000F00, 0x00000F0F, 0x00000FF0, 0x00000FFF, 0x0000F000, 0x0000F00F, 0x0000F0F0, 0x0000F0FF, 0x0000FF00, 0x0000FF0F, 0x0000FFF0, 0x0000FFFF, 0x000F0000, 0x000F000F, 0x000F00F0, 0x000F00FF, 0x000F0F00, 0x000F0F0F, 0x000F0FF0, 0x000F0FFF, 0x000FF000, 0x000FF00F, 0x000FF0F0, 0x000FF0FF, 0x000FFF00, 0x000FFF0F, 0x000FFFF0, 0x000FFFFF, 0x00F00000, 0x00F0000F, 0x00F000F0, 0x00F000FF, 0x00F00F00, 0x00F00F0F, 0x00F00FF0, 0x00F00FFF, 0x00F0F000, 0x00F0F00F, 0x00F0F0F0, 0x00F0F0FF, 0x00F0FF00, 0x00F0FF0F, 0x00F0FFF0, 0x00F0FFFF, 0x00FF0000, 0x00FF000F, 0x00FF00F0, 0x00FF00FF, 0x00FF0F00, 0x00FF0F0F, 0x00FF0FF0, 0x00FF0FFF, 0x00FFF000, 0x00FFF00F, 0x00FFF0F0, 0x00FFF0FF, 0x00FFFF00, 0x00FFFF0F, 0x00FFFFF0, 0x00FFFFFF, 0x0F000000, 0x0F00000F, 0x0F0000F0, 0x0F0000FF, 0x0F000F00, 0x0F000F0F, 0x0F000FF0, 0x0F000FFF, 0x0F00F000, 0x0F00F00F, 0x0F00F0F0, 0x0F00F0FF, 0x0F00FF00, 0x0F00FF0F, 0x0F00FFF0, 0x0F00FFFF, 0x0F0F0000, 0x0F0F000F, 0x0F0F00F0, 0x0F0F00FF, 0x0F0F0F00, 0x0F0F0F0F, 0x0F0F0FF0, 0x0F0F0FFF, 0x0F0FF000, 0x0F0FF00F, 0x0F0FF0F0, 0x0F0FF0FF, 0x0F0FFF00, 0x0F0FFF0F, 0x0F0FFFF0, 0x0F0FFFFF, 0x0FF00000, 0x0FF0000F, 0x0FF000F0, 0x0FF000FF, 0x0FF00F00, 0x0FF00F0F, 0x0FF00FF0, 0x0FF00FFF, 0x0FF0F000, 0x0FF0F00F, 0x0FF0F0F0, 0x0FF0F0FF, 0x0FF0FF00, 0x0FF0FF0F, 0x0FF0FFF0, 0x0FF0FFFF, 0x0FFF0000, 0x0FFF000F, 0x0FFF00F0, 0x0FFF00FF, 0x0FFF0F00, 0x0FFF0F0F, 0x0FFF0FF0, 0x0FFF0FFF, 0x0FFFF000, 0x0FFFF00F, 0x0FFFF0F0, 0x0FFFF0FF, 0x0FFFFF00, 0x0FFFFF0F, 0x0FFFFFF0, 0x0FFFFFFF, 0xF0000000, 0xF000000F, 0xF00000F0, 0xF00000FF, 0xF0000F00, 0xF0000F0F, 0xF0000FF0, 0xF0000FFF, 0xF000F000, 0xF000F00F, 0xF000F0F0, 0xF000F0FF, 0xF000FF00, 0xF000FF0F, 0xF000FFF0, 0xF000FFFF, 0xF00F0000, 0xF00F000F, 0xF00F00F0, 0xF00F00FF, 0xF00F0F00, 0xF00F0F0F, 0xF00F0FF0, 0xF00F0FFF, 0xF00FF000, 0xF00FF00F, 0xF00FF0F0, 0xF00FF0FF, 0xF00FFF00, 0xF00FFF0F, 0xF00FFFF0, 0xF00FFFFF, 0xF0F00000, 0xF0F0000F, 0xF0F000F0, 0xF0F000FF, 0xF0F00F00, 0xF0F00F0F, 0xF0F00FF0, 0xF0F00FFF, 0xF0F0F000, 0xF0F0F00F, 0xF0F0F0F0, 0xF0F0F0FF, 0xF0F0FF00, 0xF0F0FF0F, 0xF0F0FFF0, 0xF0F0FFFF, 0xF0FF0000, 0xF0FF000F, 0xF0FF00F0, 0xF0FF00FF, 0xF0FF0F00, 0xF0FF0F0F, 0xF0FF0FF0, 0xF0FF0FFF, 0xF0FFF000, 0xF0FFF00F, 0xF0FFF0F0, 0xF0FFF0FF, 0xF0FFFF00, 0xF0FFFF0F, 0xF0FFFFF0, 0xF0FFFFFF, 0xFF000000, 0xFF00000F, 0xFF0000F0, 0xFF0000FF, 0xFF000F00, 0xFF000F0F, 0xFF000FF0, 0xFF000FFF, 0xFF00F000, 0xFF00F00F, 0xFF00F0F0, 0xFF00F0FF, 0xFF00FF00, 0xFF00FF0F, 0xFF00FFF0, 0xFF00FFFF, 0xFF0F0000, 0xFF0F000F, 0xFF0F00F0, 0xFF0F00FF, 0xFF0F0F00, 0xFF0F0F0F, 0xFF0F0FF0, 0xFF0F0FFF, 0xFF0FF000, 0xFF0FF00F, 0xFF0FF0F0, 0xFF0FF0FF, 0xFF0FFF00, 0xFF0FFF0F, 0xFF0FFFF0, 0xFF0FFFFF, 0xFFF00000, 0xFFF0000F, 0xFFF000F0, 0xFFF000FF, 0xFFF00F00, 0xFFF00F0F, 0xFFF00FF0, 0xFFF00FFF, 0xFFF0F000, 0xFFF0F00F, 0xFFF0F0F0, 0xFFF0F0FF, 0xFFF0FF00, 0xFFF0FF0F, 0xFFF0FFF0, 0xFFF0FFFF, 0xFFFF0000, 0xFFFF000F, 0xFFFF00F0, 0xFFFF00FF, 0xFFFF0F00, 0xFFFF0F0F, 0xFFFF0FF0, 0xFFFF0FFF, 0xFFFFF000, 0xFFFFF00F, 0xFFFFF0F0, 0xFFFFF0FF, 0xFFFFFF00, 0xFFFFFF0F, 0xFFFFFFF0, 0xFFFFFFFF, }; return masks[crm]; } /*************************************************************************/ /********************* Translation utility routines **********************/ /*************************************************************************/ /** * convert_fpr: Convert a floating-point value from one type to another, * and return an RTL register containing the converted value. * * [Parameters] * ctx: Translation context. * index: FPR index. * reg: RTL register containing FPR's value. * old_type: Type of "reg". * new_type: Type to convert value to. Must be different from old_type. * snan_safe: True if the value is known not to be a signaling NaN. * [Return value] * Index of RTL register containing converted value. */ static int convert_fpr(GuestPPCContext *ctx, int index, int reg, RTLDataType old_type, RTLDataType new_type, bool snan_safe) { if (UNLIKELY(!reg)) { return 0; // Don't ASSERT() over an error that already occurred. } ASSERT(reg == ctx->live.fpr[index]); ASSERT(old_type != new_type); ASSERT(rtl_type_is_float(old_type) || (rtl_type_is_vector(old_type) && rtl_vector_length(old_type) == 2 && rtl_type_is_float(rtl_vector_element_type(old_type)))); ASSERT(rtl_type_is_float(new_type) || (rtl_type_is_vector(new_type) && rtl_vector_length(new_type) == 2 && rtl_type_is_float(rtl_vector_element_type(new_type)))); const bool need_snan_check = !snan_safe && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_ASSUME_NO_SNAN); RTLUnit * const unit = ctx->unit; int new_reg; /* If converting between 32-bit and 64-bit formats, save and restore * floating-point state to avoid leaving stale exceptions, unless * NO_FPSCR_STATE is disabled (in which case we don't care about host * exceptions in the first place). But if converting from FLOAT32 to * FLOAT64 (scalar or vector) and not checking for SNaNs, we don't * need to bother because no exceptions can be raised in that case. */ int fpstate = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const RTLDataType old_stype = (rtl_type_is_vector(old_type) ? rtl_vector_element_type(old_type) : old_type); const RTLDataType new_stype = (rtl_type_is_vector(new_type) ? rtl_vector_element_type(new_type) : new_type); if (old_stype != new_stype && !(new_stype == RTLTYPE_FLOAT64 && !need_snan_check)) { fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); } } if (old_type == RTLTYPE_V2_FLOAT64) { if (new_type == RTLTYPE_V2_FLOAT32) { new_reg = vfcast_64to32(unit, reg, need_snan_check); } else { const int f64 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, f64, reg, 0, 0); if (new_type == RTLTYPE_FLOAT64) { new_reg = f64; } else { ASSERT(new_type == RTLTYPE_FLOAT32); new_reg = fcast_64to32(unit, f64, need_snan_check); } } } else if (old_type == RTLTYPE_V2_FLOAT32) { if (new_type == RTLTYPE_FLOAT32) { new_reg = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, new_reg, reg, 0, 0); } else { const int f64x2 = vfcast_32to64(unit, reg, need_snan_check); if (new_type == RTLTYPE_V2_FLOAT64) { new_reg = f64x2; } else { ASSERT(new_type == RTLTYPE_FLOAT64); ASSERT(unit->aliases[ctx->alias.fpr[index]].type == RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, f64x2, 0, ctx->alias.fpr[index]); ctx->live.fpr[index] = f64x2; ctx->fpr_raw[index] = 0; new_reg = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, new_reg, f64x2, 0, 0); } } } else if (old_type == RTLTYPE_FLOAT64) { if (new_type == RTLTYPE_FLOAT32) { new_reg = fcast_64to32(unit, reg, need_snan_check); } else { ASSERT(unit->aliases[ctx->alias.fpr[index]].type == RTLTYPE_V2_FLOAT64); const int old_f64x2 = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, old_f64x2, 0, 0, ctx->alias.fpr[index]); const int f64x2 = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VINSERT, f64x2, old_f64x2, reg, 0); if (new_type == RTLTYPE_V2_FLOAT64) { new_reg = f64x2; } else { ASSERT(new_type == RTLTYPE_V2_FLOAT32); new_reg = vfcast_64to32(unit, f64x2, need_snan_check); } } } else { ASSERT(old_type == RTLTYPE_FLOAT32); if (new_type == RTLTYPE_V2_FLOAT32) { new_reg = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBROADCAST, new_reg, reg, 0, 0); } else { const int f64 = fcast_32to64(unit, reg, need_snan_check); if (new_type == RTLTYPE_V2_FLOAT64) { new_reg = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBROADCAST, new_reg, f64, 0, 0); } else { ASSERT(new_type == RTLTYPE_FLOAT64); new_reg = f64; } } } if (fpstate) { rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); } return new_reg; } /*-----------------------------------------------------------------------*/ /** * get_gpr, get_fpr, get_cr, get_crb, get_lr, get_ctr, get_xer, get_xer_so, * get_fpscr, get_fr_fi_fprf: Return an RTL register containing the value * of the given PowerPC register or register field. This will either be * the register last used in a corresponding set or get operation, or a * newly allocated register (in which case an appropriate GET_ALIAS * instruction will also be added). * * For get_crb(), get_xer_so(), and get_fr_fi_fprf(), if the USE_SPLIT_FIELDS * optimization is not enabled, the value will be extracted from CR/XER/FPSCR * respectively. * * [Parameters] * ctx: Translation context. * index: PowerPC register index (get_gpr(), get_fpr()) or CR bit index * (get_crb()). * [Return value] * RTL register index. */ static inline int get_gpr(GuestPPCContext * const ctx, int index) { if (ctx->live.gpr[index]) { return ctx->live.gpr[index]; } else { RTLUnit * const unit = ctx->unit; const int reg = rtl_alloc_register(unit, RTLTYPE_INT32); ASSERT(ctx->alias.gpr[index]); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.gpr[index]); ctx->live.gpr[index] = reg; return reg; } } static inline int get_fpr(GuestPPCContext * const ctx, int index) { if (ctx->live.fpr[index]) { return ctx->live.fpr[index]; } else { RTLUnit * const unit = ctx->unit; const RTLDataType base_type = (ctx->fpr_is_ps & (1 << index) ? RTLTYPE_V2_FLOAT64 : RTLTYPE_FLOAT64); const int reg = rtl_alloc_register(unit, base_type); ASSERT(ctx->alias.fpr[index]); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.fpr[index]); ctx->live.fpr[index] = reg; return reg; } } static inline int get_cr(GuestPPCContext * const ctx) { if (ctx->live.cr) { return ctx->live.cr; } else { RTLUnit * const unit = ctx->unit; const int reg = rtl_alloc_register(unit, RTLTYPE_INT32); ASSERT(ctx->alias.cr); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.cr); ctx->live.cr = reg; return reg; } } static inline int get_crb(GuestPPCContext * const ctx, int index) { if (ctx->live.crb[index]) { return ctx->live.crb[index]; } else { RTLUnit * const unit = ctx->unit; int reg; if (ctx->crb_changed_bitrev & (0x80000000 >> index)) { ASSERT(ctx->alias.crb[index]); reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.crb[index]); } else { const int cr = get_cr(ctx); reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, reg, cr, 0, (31-index) | (1<<8)); } if (ctx->use_split_fields) { ctx->live.crb[index] = reg; } return reg; } } static inline int get_lr(GuestPPCContext * const ctx) { if (ctx->live.lr) { return ctx->live.lr; } else { RTLUnit * const unit = ctx->unit; const int reg = rtl_alloc_register(unit, RTLTYPE_INT32); ASSERT(ctx->alias.lr); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.lr); ctx->live.lr = reg; return reg; } } static inline int get_ctr(GuestPPCContext * const ctx) { if (ctx->live.ctr) { return ctx->live.ctr; } else { RTLUnit * const unit = ctx->unit; const int reg = rtl_alloc_register(unit, RTLTYPE_INT32); ASSERT(ctx->alias.ctr); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.ctr); ctx->live.ctr = reg; return reg; } } static inline int get_xer(GuestPPCContext * const ctx) { if (ctx->live.xer) { return ctx->live.xer; } else { RTLUnit * const unit = ctx->unit; const int reg = rtl_alloc_register(unit, RTLTYPE_INT32); ASSERT(ctx->alias.xer); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.xer); ctx->live.xer = reg; return reg; } } static inline int get_fpscr(GuestPPCContext * const ctx) { if (ctx->live.fpscr) { return ctx->live.fpscr; } else { RTLUnit * const unit = ctx->unit; const int reg = rtl_alloc_register(unit, RTLTYPE_INT32); ASSERT(ctx->alias.fpscr); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.fpscr); ctx->live.fpscr = reg; return reg; } } static inline int get_fr_fi_fprf(GuestPPCContext * const ctx) { if (ctx->live.fr_fi_fprf) { return ctx->live.fr_fi_fprf; } else { RTLUnit * const unit = ctx->unit; int reg; if (ctx->use_split_fields) { ASSERT(ctx->alias.fr_fi_fprf); reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.fr_fi_fprf); ctx->live.fr_fi_fprf = reg; } else { int fpscr; if (ctx->live.fpscr) { fpscr = ctx->live.fpscr; } else { /* If FPSCR was not already live, it's generally not safe * to make it live here (because so many code paths set * FPSCR/FPRF conditionally). */ fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fpscr, 0, 0, ctx->alias.fpscr); } reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, reg, fpscr, 0, FPSCR_FPRF_SHIFT | 7<<8); } return reg; } } static inline int get_xer_so(GuestPPCContext * const ctx) { if (ctx->live.xer_so) { return ctx->live.xer_so; } else { RTLUnit * const unit = ctx->unit; int reg; if (ctx->use_split_fields) { ASSERT(ctx->alias.xer_so); reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.xer_so); } else { const int xer = get_xer(ctx); reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, reg, xer, 0, XER_SO_SHIFT | 1<<8); } ctx->live.xer_so = reg; return reg; } } /*-----------------------------------------------------------------------*/ /** * get_fpr_as_type: Return the value of the given floating-point register * converted to the given type. * * [Parameters] * ctx: Translation context. * index: CR bit index. * type: Type of value to return. * [Return value] * RTL register index. */ static inline int get_fpr_as_type(GuestPPCContext * const ctx, int index, RTLDataType type) { RTLUnit * const unit = ctx->unit; int reg = get_fpr(ctx, index); const RTLDataType current_type = unit->regs[reg].type; if (type != current_type) { uint32_t safe_set = ctx->fpr_is_safe; if (rtl_type_is_vector(type)) { safe_set &= ctx->ps1_is_safe; } const bool snan_safe = (safe_set & (1 << index)) != 0; reg = convert_fpr(ctx, index, reg, current_type, type, snan_safe); } return reg; } /*-----------------------------------------------------------------------*/ /** * get_ps1: Return the value in the second paired-single slot of a * floating-point register. * * [Parameters] * ctx: Translation context. * index: CR bit index. * type: Type of value to return (either RTLTYPE_FLOAT32 or * RTLTYPE_FLOAT64). * [Return value] * RTL register index. */ static inline int get_ps1(GuestPPCContext * const ctx, int index, RTLDataType type) { RTLUnit * const unit = ctx->unit; int reg; if (ctx->live.fpr[index]) { const RTLDataType live_type = unit->regs[ctx->live.fpr[index]].type; if (rtl_type_is_vector(live_type)) { reg = rtl_alloc_register(unit, rtl_vector_element_type(live_type)); rtl_add_insn(unit, RTLOP_VEXTRACT, reg, ctx->live.fpr[index], 0, 1); } else { /* We never write a scalar FLOAT64 to a paired-single register. */ ASSERT(live_type == RTLTYPE_FLOAT32); reg = ctx->live.fpr[index]; } } else { const int pair = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, pair, 0, 0, ctx->alias.fpr[index]); ctx->live.fpr[index] = pair; reg = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, reg, pair, 0, 1); } const RTLDataType current_type = unit->regs[reg].type; if (current_type != type) { const int need_snan_check = !(ctx->ps1_is_safe & (1 << index)) && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_ASSUME_NO_SNAN); /* Avoid raising exceptions, as in convert_fpr(). */ int fpstate = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) && !(type == RTLTYPE_FLOAT64 && !need_snan_check)) { fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); } if (type == RTLTYPE_FLOAT64) { reg = fcast_32to64(unit, reg, need_snan_check); } else { reg = fcast_64to32(unit, reg, need_snan_check); } if (fpstate) { rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); } } return reg; } /*-----------------------------------------------------------------------*/ /** * test_crb: Return an RTL register containing a value which is nonzero if * the given CR bit is set and zero if it is clear. This function does not * initialize the CR bit alias if it has not already been loaded. * * [Parameters] * ctx: Translation context. * index: CR bit index. * [Return value] * RTL register index. */ static inline int test_crb(GuestPPCContext * const ctx, int index) { if (ctx->live.crb[index]) { return ctx->live.crb[index]; } else { RTLUnit * const unit = ctx->unit; int reg; if (ctx->alias.crb[index]) { reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, reg, 0, 0, ctx->alias.crb[index]); ctx->live.crb[index] = reg; } else { const int cr = get_cr(ctx); reg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, reg, cr, 0, 0x80000000 >> index); } return reg; } } /*-----------------------------------------------------------------------*/ /** * set_gpr, set_fpr, set_cr, set_crb, set_lr, set_ctr, set_xer, set_fpscr, * set_fr_fi_fprf: Store the given RTL register to the given PowerPC * register. * * [Parameters] * ctx: Translation context. * index: PowerPC register index (set_gpr(), set_fpr()) or CR bit index * (set_crb()). * reg: Register to store. * so: Register holding the value of XER[SO], 0 if not available, or * -1 if XER[SO] has not changed (set_xer() only). */ static inline void set_gpr(GuestPPCContext * const ctx, int index, int reg) { RTLUnit * const unit = ctx->unit; if (ctx->last_set.gpr[index] >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.gpr[index], false, false); } ctx->last_set.gpr[index] = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.gpr[index]); ctx->live.gpr[index] = reg; ctx->gpr_raw[index] = 0; } static inline void set_fpr(GuestPPCContext * const ctx, int index, int reg) { RTLUnit * const unit = ctx->unit; /* If overwriting a different type with FLOAT64, we need to store the * second half of the old value to the state block or alias. */ if (ctx->live.fpr[index] && unit->regs[reg].type == RTLTYPE_FLOAT64 && unit->regs[ctx->live.fpr[index]].type != RTLTYPE_FLOAT64) { const int old_reg = ctx->live.fpr[index]; if (unit->regs[old_reg].type == RTLTYPE_V2_FLOAT64) { /* This typically happens if a register is used in paired-single * mode at a different point in the unit but is being used in * double-precision mode here. Just insert the new value into * the old vector and use the updated vector as the current * FPR value. */ const int new = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VINSERT, new, old_reg, reg, 0); reg = new; } else { int ps1; if (unit->regs[old_reg].type == RTLTYPE_FLOAT32) { ps1 = old_reg; } else { ASSERT(unit->regs[old_reg].type == RTLTYPE_V2_FLOAT32); ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, ps1, old_reg, 0, 1); } int fpstate = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); } const bool snan_safe = (ctx->ps1_is_safe & (1 << index)) || (ctx->handle->guest_opt & BINREC_OPT_G_PPC_ASSUME_NO_SNAN); const int ps1_64 = fcast_32to64(unit, ps1, !snan_safe); if (ctx->fpr_is_ps & (1 << index)) { const int new = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBUILD2, new, reg, ps1_64, 1); reg = new; } else { rtl_add_insn( unit, RTLOP_STORE, 0, ctx->psb_reg, ps1_64, ctx->handle->setup.state_offsets_ppc.fpr + index*16 + 8); } if (fpstate) { rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); } } } ctx->live.fpr[index] = reg; ctx->fpr_dirty |= 1 << index; ctx->fpr_raw[index] = 0; ctx->ps_raw[index] = 0; } static inline void set_cr(GuestPPCContext * const ctx, int reg) { RTLUnit * const unit = ctx->unit; if (ctx->last_set.cr >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.cr, false, false); } ctx->last_set.cr = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.cr); ctx->live.cr = reg; } static inline void set_crb(GuestPPCContext * const ctx, int index, int reg) { RTLUnit * const unit = ctx->unit; if (ctx->use_split_fields) { ASSERT(ctx->crb_changed_bitrev & (0x80000000 >> index)); if (ctx->last_set.crb[index] >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.crb[index], false, false); } ctx->last_set.crb[index] = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.crb[index]); ctx->live.crb[index] = reg; ctx->crb_dirty |= 1 << index; } else { const int old_cr = get_cr(ctx); const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_cr, old_cr, reg, (31-index) | 1<<8); set_cr(ctx, new_cr); } } static inline void set_lr(GuestPPCContext * const ctx, int reg) { RTLUnit * const unit = ctx->unit; if (ctx->last_set.lr >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.lr, false, false); } ctx->last_set.lr = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.lr); ctx->live.lr = reg; } static inline void set_ctr(GuestPPCContext * const ctx, int reg) { RTLUnit * const unit = ctx->unit; if (ctx->last_set.ctr >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.ctr, false, false); } ctx->last_set.ctr = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.ctr); ctx->live.ctr = reg; } static inline void set_xer(GuestPPCContext * const ctx, int reg, int so) { RTLUnit * const unit = ctx->unit; if (ctx->last_set.xer >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.xer, false, false); } ctx->last_set.xer = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.xer); ctx->live.xer = reg; if (so >= 0) { if (ctx->alias.xer_so) { if (ctx->last_set.xer_so >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.xer_so, false, false); } if (!so) { so = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, so, reg, 0, XER_SO_SHIFT | 1<<8); } ctx->last_set.xer_so = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, so, 0, ctx->alias.xer_so); } ctx->live.xer_so = so; } } static inline void set_fpscr(GuestPPCContext * const ctx, int reg) { RTLUnit * const unit = ctx->unit; if (ctx->last_set.fpscr >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.fpscr, false, false); } ctx->last_set.fpscr = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.fpscr); ctx->live.fpscr = reg; } static inline void set_fr_fi_fprf(GuestPPCContext * const ctx, int reg) { RTLUnit * const unit = ctx->unit; if (ctx->use_split_fields) { if (ctx->last_set.fr_fi_fprf >= 0) { rtl_opt_kill_insn(unit, ctx->last_set.fr_fi_fprf, false, false); } ctx->last_set.fr_fi_fprf = unit->num_insns; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.fr_fi_fprf); ctx->live.fr_fi_fprf = reg; } else { const int old_fpscr = get_fpscr(ctx); const int new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_fpscr, old_fpscr, reg, FPSCR_FPRF_SHIFT | 7<<8); set_fpscr(ctx, new_fpscr); } } /*-----------------------------------------------------------------------*/ /** * set_crf: Set the given CR 4-bit field from the given four registers, * each assumed to hold a 1-bit value. This generates more efficient code * than a sequence of calls to set_crb() if the USE_SPLIT_FIELDS * optimization is not enabled. */ static inline void set_crf(GuestPPCContext * const ctx, int index, int bit0, int bit1, int bit2, int bit3) { RTLUnit * const unit = ctx->unit; if (ctx->use_split_fields) { set_crb(ctx, index*4+0, bit0); set_crb(ctx, index*4+1, bit1); set_crb(ctx, index*4+2, bit2); set_crb(ctx, index*4+3, bit3); } else { const int old_cr = get_cr(ctx); const int bit0_sll3 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, bit0_sll3, bit0, 0, 3); const int bit1_sll2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, bit1_sll2, bit1, 0, 2); const int bit2_sll1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, bit2_sll1, bit2, 0, 1); const int bit01 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, bit01, bit0_sll3, bit1_sll2, 0); const int bit23 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, bit23, bit2_sll1, bit3, 0); const int bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, bits, bit01, bit23, 0); const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_cr, old_cr, bits, ((7-index)*4) | 4<<8); set_cr(ctx, new_cr); } } /*-----------------------------------------------------------------------*/ /** * set_fr_fi_fprf_and_flush: Store the given value to fr_fi_fprf, and * immediately flush it out. */ static void set_fr_fi_fprf_and_flush(GuestPPCContext *ctx, int reg) { set_fr_fi_fprf(ctx, reg); if (ctx->use_split_fields) { ctx->live.fr_fi_fprf = 0; ctx->last_set.fr_fi_fprf = -1; } else { ctx->live.fpscr = 0; ctx->last_set.fpscr = -1; } } /*-----------------------------------------------------------------------*/ /** * get_fpr_scalar_type: Return the scalar type corresponding to the * current mode of the given floating-point register. */ static inline RTLDataType get_fpr_scalar_type(GuestPPCContext *ctx, int index) { if (ctx->live.fpr[index]) { RTLDataType type = ctx->unit->regs[ctx->live.fpr[index]].type; if (rtl_type_is_vector(type)) { type = rtl_vector_element_type(type); } return type; } else { return RTLTYPE_FLOAT64; } } /*-----------------------------------------------------------------------*/ /** * get_fpscr_fex_vx: Return RTL registers containing the FEX and VX bits * for the given value of FPSCR. * * [Parameters] * ctx: Translation context. * fpscr: RTL register containing the FPSCR value. * fex_ret: Pointer to variable to receive an RTL register containing * the value of the FEX bit. * vx_ret: Pointer to variable to receive an RTL register containing * the value of the VX bit. */ static void get_fpscr_fex_vx(GuestPPCContext *ctx, int fpscr, int *fex_ret, int *vx_ret) { ASSERT(fex_ret); ASSERT(vx_ret); RTLUnit * const unit = ctx->unit; const int vx_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, vx_test, fpscr, 0, FPSCR_ALL_VXFOO); const int vx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTUI, vx, vx_test, 0, 0); *vx_ret = vx; const int x_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, x_bits, fpscr, 0, FPSCR_XX_SHIFT | 4<<8); const int vx_shifted = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, vx_shifted, vx, 0, 4); const int e_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, e_bits, fpscr, 0, FPSCR_XE_SHIFT); const int x_bits_vx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, x_bits_vx, x_bits, vx_shifted, 0); const int both_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, both_bits, x_bits_vx, e_bits, 0); const int test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, test, both_bits, 0, 31); const int fex = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTUI, fex, test, 0, 0); *fex_ret = fex; } /*-----------------------------------------------------------------------*/ /** * get_ea_base: Allocate and return a new RTL register of ADDRESS type * containing the host address for the base EA (without offset) in the * given D-form instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * [Return value] * RTL register containing the host address corresponding to (rA|0). */ static inline int get_ea_base(GuestPPCContext *ctx, uint32_t insn) { RTLUnit * const unit = ctx->unit; if (insn_rA(insn)) { const int rA = get_gpr(ctx, insn_rA(insn)); const int address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, address, rA, 0, 0); const int host_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, host_address, ctx->membase_reg, address, 0); return host_address; } else { return ctx->membase_reg; } } /*-----------------------------------------------------------------------*/ /** * get_ea_indexed: Allocate and return a new RTL register of ADDRESS type * containing the host address for the EA in the given X-form instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * guest_ea_ret: Pointer to variable to receive the RTL register * containing ((rA|0) + rB), or NULL if not needed. * [Return value] * RTL register containing the host address corresponding to ((rA|0) + rB). */ static inline int get_ea_indexed(GuestPPCContext *ctx, uint32_t insn, int *guest_ea_ret) { RTLUnit * const unit = ctx->unit; int addr32; if (insn_rA(insn)) { const int rA = get_gpr(ctx, insn_rA(insn)); const int rB = get_gpr(ctx, insn_rB(insn)); addr32 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADD, addr32, rA, rB, 0); } else { addr32 = get_gpr(ctx, insn_rB(insn)); } if (guest_ea_ret) { *guest_ea_ret = addr32; } const int address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, address, addr32, 0, 0); const int host_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, host_address, ctx->membase_reg, address, 0); return host_address; } /*-----------------------------------------------------------------------*/ /** * gen_load_store_address: Generate the address for a load or store operation. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_indexed: True if the access is an indexed access, false if not. * update: True if rA will be updated, false if not. * disp_ret: Pointer to variable to receive the access offset for the * load/store instruction. Always receives zero if is_indexed or * update is true. * ea_ret: Pointer to variable to receive the effective address to * write back to rA for an update operation. Not modified (and may * be NULL) if update is false. * [Return value] * RTL register containing the host address for the load/store instruction. */ static int gen_load_store_address(GuestPPCContext *ctx, uint32_t insn, bool is_indexed, bool update, int *disp_ret, int *ea_ret) { ASSERT(ctx); ASSERT(disp_ret); ASSERT(!update || ea_ret); RTLUnit * const unit = ctx->unit; int host_address; const int disp = insn_OPCD(insn) >= OPCD_PSQ_L ? insn_d12(insn) : insn_d(insn); if (update) { ASSERT(insn_rA(insn) != 0); if (is_indexed) { host_address = get_ea_indexed(ctx, insn, ea_ret); } else { int ea; const int rA = get_gpr(ctx, insn_rA(insn)); if (disp != 0) { ea = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, ea, rA, 0, disp); } else { ea = rA; } const int ea_zcast = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, ea_zcast, ea, 0, 0); host_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, host_address, ctx->membase_reg, ea_zcast, 0); *ea_ret = ea; } *disp_ret = 0; } else { if (is_indexed) { host_address = get_ea_indexed(ctx, insn, NULL); *disp_ret = 0; } else { if (insn_rA(insn) != 0 || disp >= 0) { host_address = get_ea_base(ctx, insn); *disp_ret = disp; } else { const int offset = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD_IMM, offset, 0, 0, (uint32_t)disp); host_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, host_address, ctx->membase_reg, offset, 0); *disp_ret = 0; } } } return host_address; } /*-----------------------------------------------------------------------*/ /** * flush_gpr: Finalize any pending store for the given general-purpose * register. * * [Parameters] * ctx: Translation context. * index: GPR index. */ static void flush_gpr(GuestPPCContext *ctx, int index) { ctx->last_set.gpr[index] = -1; ctx->live.gpr[index] = 0; ctx->gpr_raw[index] = 0; } /*-----------------------------------------------------------------------*/ /** * flush_fpr: Finalize any pending store for the given floating-point * register. * * [Parameters] * ctx: Translation context. * index: FPR index. * clear_live: True to clear the live state of the register, false to * leave it live. */ static void flush_fpr(GuestPPCContext *ctx, int index, bool clear_live) { if (ctx->fpr_dirty & (1 << index)) { int reg = ctx->live.fpr[index]; const RTLDataType base_type = (ctx->fpr_is_ps & (1 << index) ? RTLTYPE_V2_FLOAT64 : RTLTYPE_FLOAT64); const RTLDataType current_type = ctx->unit->regs[reg].type; if (current_type != base_type) { uint32_t safe_set = ctx->fpr_is_safe; if (rtl_type_is_vector(current_type)) { safe_set &= ctx->ps1_is_safe; } const bool snan_safe = (safe_set & (1 << index)) != 0; reg = convert_fpr(ctx, index, reg, current_type, base_type, snan_safe); if (current_type==RTLTYPE_FLOAT32 && base_type==RTLTYPE_FLOAT64) { rtl_add_insn( ctx->unit, RTLOP_STORE, 0, ctx->psb_reg, reg, ctx->handle->setup.state_offsets_ppc.fpr + 16*index + 8); } } rtl_add_insn(ctx->unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.fpr[index]); ctx->fpr_dirty &= ~(1 << index); } if (clear_live) { ctx->live.fpr[index] = 0; ctx->fpr_is_safe &= ~(1 << index); ctx->ps1_is_safe &= ~(1 << index); } ctx->fpr_raw[index] = 0; ctx->ps_raw[index] = 0; } /*-----------------------------------------------------------------------*/ /** * set_fpr_and_flush: Store the given RTL register to the given PowerPC * floating-point register, and immediately flush its value out. The value * is assumed to be safe for conversion. * * [Parameters] * ctx: Translation context. * index: FPR index. * reg: RTL register to store. * snan_safe: True if the value to store is known not to contain SNaNs. */ static void set_fpr_and_flush(GuestPPCContext *ctx, int index, int reg, bool snan_safe) { set_fpr(ctx, index, reg); if (snan_safe) { ctx->fpr_is_safe |= 1 << index; if (ctx->unit->regs[reg].type != RTLTYPE_FLOAT64) { ctx->ps1_is_safe |= 1 << index; } } flush_fpr(ctx, index, true); } /*-----------------------------------------------------------------------*/ /** * flush_live_regs: Finalize all pending stores of guest registers. * This does not handle merging split bitfields back to their primary * registers. * * [Parameters] * ctx: Translation context. * clear: True to clear all live registers after flushing them; false * to leave them live. */ static void flush_live_regs(GuestPPCContext *ctx, bool clear) { uint32_t fpr_dirty = ctx->fpr_dirty; while (fpr_dirty) { const int index = ctz32(fpr_dirty); fpr_dirty ^= 1 << index; flush_fpr(ctx, index, false); } if (clear) { memset(&ctx->last_set, -1, sizeof(ctx->last_set)); memset(&ctx->live, 0, sizeof(ctx->live)); ctx->fpr_is_safe = 0; ctx->ps1_is_safe = 0; ctx->crb_dirty = 0; } else { /* Only clear last_set for registers which are mapped to the PSB. */ memset(ctx->last_set.gpr, -1, sizeof(ctx->last_set.gpr)); ctx->last_set.cr = -1; ctx->last_set.lr = -1; ctx->last_set.ctr = -1; ctx->last_set.xer = -1; ctx->last_set.fpscr = -1; } } /*-----------------------------------------------------------------------*/ /** * merge_cr: Merge all CR bit aliases (and untouched CR bits from the * processor state block) into a 32-bit CR word. Helper for * guest_ppc_flush_cr(). * * [Parameters] * ctx: Translation context. * make_live: True to always leave CR live in its alias, false to not * call get_cr() if CR is not live. * [Return value] * RTL register containing merged value of CR. */ static int merge_cr(GuestPPCContext *ctx, bool make_live) { ASSERT(ctx->use_split_fields); RTLUnit * const unit = ctx->unit; uint32_t crb_changed = ctx->crb_changed_bitrev; ASSERT(crb_changed != 0); // We won't be called if nothing to merge. int cr; if (crb_changed == ~UINT32_C(0)) { cr = 0; } else { int old_cr; if (make_live) { old_cr = get_cr(ctx); } else if (ctx->live.cr) { old_cr = ctx->live.cr; } else { old_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, old_cr, 0, 0, ctx->alias.cr); } cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, cr, old_cr, 0, ~crb_changed); } while (crb_changed) { const int bit = clz32(crb_changed); crb_changed ^= 0x80000000 >> bit; const int crbN = get_crb(ctx, bit); int shifted_crbN; if (bit == 31) { shifted_crbN = crbN; } else { shifted_crbN = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_crbN, crbN, 0, 31 - bit); } if (cr) { const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_cr, cr, shifted_crbN, 0); cr = new_cr; } else { cr = shifted_crbN; } } return cr; } /*-----------------------------------------------------------------------*/ /** * merge_fpscr: Merge the FR/FI/FPRF alias into FPSCR and return an RTL * register containing the merged value. Helper for guest_ppc_flush_fpscr() * and mffs. * * [Parameters] * ctx: Translation context. * make_live: True to always leave FPSCR live in its alias, false to not * call get_fpscr() if FPSCR is not live. * [Return value] * RTL register containing merged value of FPSCR. */ static int merge_fpscr(GuestPPCContext *ctx, bool make_live) { ASSERT(ctx->use_split_fields); RTLUnit * const unit = ctx->unit; int fpscr; if (make_live) { fpscr = get_fpscr(ctx); } else if (ctx->live.fpscr) { fpscr = ctx->live.fpscr; } else { fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fpscr, 0, 0, ctx->alias.fpscr); } const int fr_fi_fprf = get_fr_fi_fprf(ctx); const int masked_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_fpscr, fpscr, 0, ~(FPSCR_FEX | FPSCR_VX | FPSCR_FR | FPSCR_FI | FPSCR_FPRF | FPSCR_RESV20)); const int shifted_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_fprf, fr_fi_fprf, 0, FPSCR_FPRF_SHIFT); const int merged_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, merged_fpscr, masked_fpscr, shifted_fprf, 0); return merged_fpscr; } /*-----------------------------------------------------------------------*/ /** * post_insn_callback: Add RTL to call the post-instruction callback if * one has been set. * * [Parameters] * ctx: Translation context. * address: Instruction address to pass to the callback. */ static void post_insn_callback(GuestPPCContext *ctx, uint32_t address) { if (ctx->handle->post_insn_callback) { flush_live_regs(ctx, false); RTLUnit * const unit = ctx->unit; const int func = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD_IMM, func, 0, 0, (uintptr_t)ctx->handle->post_insn_callback); rtl_add_insn(unit, RTLOP_CALL_TRANSPARENT, 0, func, ctx->psb_reg, rtl_imm32(unit, address)); } } /*-----------------------------------------------------------------------*/ /** * check_snan: Check whether the given floating-point RTL register is a * signaling NaN, and branch to the given label if so. * * [Parameters] * ctx: Translation context. * reg: Register to check (must be of scalar floating-point type). * label: Label to branch to if the value is a SNaN. */ static void check_snan(GuestPPCContext *ctx, int reg, int label) { RTLUnit * const unit = ctx->unit; RTLDataType bits_type; int type_size, snan_start, snan_count; if (unit->regs[reg].type == RTLTYPE_FLOAT32) { bits_type = RTLTYPE_INT32; type_size = 32; snan_start = 22; snan_count = 9; } else { bits_type = RTLTYPE_INT64; type_size = 64; snan_start = 51; snan_count = 12; } const uint32_t snan_value = (1 << snan_count) - 2; const int not_snan_label = rtl_alloc_label(unit); const int bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, bits, reg, 0, 0); const int mantissa_test = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, mantissa_test, bits, 0, type_size - snan_start); const int snan_test = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BFEXT, snan_test, bits, 0, snan_start | snan_count<<8); const int is_snan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, is_snan, snan_test, 0, snan_value); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, is_snan, 0, not_snan_label); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, mantissa_test, 0, label); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, not_snan_label); } /*-----------------------------------------------------------------------*/ /** * flush_denormal: Return a register containing the given input value, * or zero if that value is a denormal. * * [Parameters] * ctx: Translation context. * reg: RTL register (must be of type FLOAT32). * [Return value] * RTL register containing result. */ static int flush_denormal(GuestPPCContext *ctx, int reg) { RTLUnit * const unit = ctx->unit; const int bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, bits, reg, 0, 0); const int exp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, exp, bits, 0, 23 | 8<<8); const int sign = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, sign, bits, 0, 0x80000000); const int zero = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, zero, sign, 0, 0); const int flushed = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_SELECT, flushed, reg, zero, exp); return flushed; } /*-----------------------------------------------------------------------*/ /** * fma_negate: Negate the given floating-point value, but only if it is * not a NaN. Helper for fused multiply-add implementations. * * [Parameters] * ctx: Translation context. * reg: RTL register (must be of a scalar floating-point type). * [Return value] * RTL register containing result. */ static int fma_negate(GuestPPCContext *ctx, int reg) { RTLUnit * const unit = ctx->unit; const RTLDataType type = unit->regs[reg].type; const int zero = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_LOAD_IMM, zero, 0, 0, 0); const int is_nan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, is_nan, reg, zero, RTLFCMP_UN); const int pos_value = reg; const int neg_value = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_FNEG, neg_value, pos_value, 0, 0); const int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_SELECT, result, pos_value, neg_value, is_nan); return result; } /*-----------------------------------------------------------------------*/ /** * fma_select_nan: Select the appropriate NaN to return for a fused * multiply-add operation. The returned value is unchanged if no operand * was a NaN. * * [Parameters] * ctx: Translation context. * result: RTL register containing result (must be of a scalar * floating-point type). * frA: RTL register containing frA (of the same type as result). * frB: RTL register containing frB (of the same type as result). * frC: RTL register containing frC (of the same type as result). * [Return value] * RTL register containing result. */ static int fma_select_nan(GuestPPCContext *ctx, int result, int frA, int frB, int frC) { RTLUnit * const unit = ctx->unit; const RTLDataType type = unit->regs[result].type; const bool use_float32 = (type == RTLTYPE_FLOAT32); /* We use a condition and SELECT instead of branches so we don't need * to mess around with temporary aliases. This creates a fairly long * dependency chain even for the (much more common) case of not * changing the result, but if NATIVE_IEEE_NAN is disabled then speed * probably isn't important anyway. */ const int zero = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_LOAD_IMM, zero, 0, 0, 0); const int frA_not_nan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, frA_not_nan, frA, zero, RTLFCMP_NUN); const int frC_is_nan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, frC_is_nan, frC, zero, RTLFCMP_UN); const int frB_is_nan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, frB_is_nan, frB, zero, RTLFCMP_UN); const int temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, temp, frA_not_nan, frC_is_nan, 0); const int use_frB = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, use_frB, temp, frB_is_nan, 0); const RTLDataType bits_type = use_float32 ? RTLTYPE_INT32 : RTLTYPE_INT64; const int frB_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, frB_bits, frB, 0, 0); const int quiet_bit = rtl_alloc_register(unit, bits_type); if (bits_type == RTLTYPE_INT32) { rtl_add_insn(unit, RTLOP_LOAD_IMM, quiet_bit, 0, 0, 0x00400000); } else { rtl_add_insn(unit, RTLOP_LOAD_IMM, quiet_bit, 0, 0, UINT64_C(0x0008000000000000)); } const int quiet_frB_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, quiet_frB_bits, frB_bits, quiet_bit, 0); const int quiet_frB = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_BITCAST, quiet_frB, quiet_frB_bits, 0, 0); const int orig_result = result; result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_SELECT, result, quiet_frB, orig_result, use_frB); return result; } /*-----------------------------------------------------------------------*/ /** * ps_dequantize: Convert an integer value to floating-point for a * paired-single load instruction. * * [Parameters] * ctx: Translation context. * intval: RTL register (of type INT32) containing the loaded value. * scale: RTL register (of type FLOAT32) containing the scale factor * (2^-gqr_scale), or 0 if the value does not need to be scaled. * [Return value] * RTL register (of type FLOAT32) containing the converted value. */ static int ps_dequantize(GuestPPCContext *ctx, int intval, int scale) { RTLUnit * const unit = ctx->unit; const int floatval = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FSCAST, floatval, intval, 0, 0); if (!scale) { return floatval; } const int result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FMUL, result, floatval, scale, 0); return result; } /*-----------------------------------------------------------------------*/ /** * ps_quantize: Convert a floating-point value to integer for a * paired-single store instruction. * * This function may raise host floating-point exceptions. * * [Parameters] * ctx: Translation context. * floatval: RTL register (of type FLOAT32) containing the value to store. * scale: RTL register (of type FLOAT32) containing the scale factor * (2^gqr_scale), or 0 if the value does not need to be scaled. * min_val: RTL register containing the low bound of the result value. * max_val: RTL register containing the high bound of the result value. * [Return value] * RTL register (of type INT32) containing the converted value. */ static int ps_quantize(GuestPPCContext *ctx, int floatval, int scale, int min_val, int max_val) { RTLUnit * const unit = ctx->unit; /* Scale the input value and convert to integer. Also check (in * parallel, for hopefully simultaneous execution) for overflow. */ int scaled_val; if (scale) { scaled_val = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FMUL, scaled_val, floatval, scale, 0); } else { scaled_val = floatval; } const int bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, bits, scaled_val, 0, 0); const int scaled_int = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTRUNCI, scaled_int, scaled_val, 0, 0); const int bits_sll1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, bits_sll1, bits, 0, 1); const int is_sign = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, is_sign, bits, 0, 31); const int overflow_val = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, overflow_val, min_val, max_val, is_sign); const int is_overflow = rtl_alloc_register(unit, RTLTYPE_INT32); /* For the overflow boundary, use a value which is both large enough * not to clip any valid output values and small enough so we're not * affected by host-defined saturation behavior. */ rtl_add_insn(unit, RTLOP_SGTUI, is_overflow, bits_sll1, 0, (0x47800000<<1)-1); // 0x47800000 == 65536.0f const int intval = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, intval, overflow_val, scaled_int, is_overflow); /* Clamp the result to the given bounds and return the clamped value. */ const int over_max = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTS, over_max, intval, max_val, 0); const int temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, temp, max_val, intval, over_max); const int under_min = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTS, under_min, intval, min_val, 0); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, result, min_val, temp, under_min); return result; } /*-----------------------------------------------------------------------*/ /** * set_fpscr_exceptions: Set the given exception bits in FPSCR, along with * the FX bit if any exception bit was not already set. * * [Parameters] * ctx: Translation context. * fpscr: RTL register containing current value of FPSCR, or 0 to * fetch it from the alias. * exceptions: Bitmask of exception bits to set. */ static void set_fpscr_exceptions(GuestPPCContext *ctx, int fpscr, uint32_t exceptions) { RTLUnit * const unit = ctx->unit; if (!fpscr) { fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fpscr, 0, 0, ctx->alias.fpscr); } const int unset_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NOT, unset_bits, fpscr, 0, 0); const int new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, new_fpscr, fpscr, 0, exceptions); const int fx_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fx_test, unset_bits, 0, exceptions); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_fpscr, 0, ctx->alias.fpscr); const int label = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, fx_test, 0, label); const int with_fx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, with_fx, new_fpscr, 0, FPSCR_FX); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, with_fx, 0, ctx->alias.fpscr); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label); } /*-----------------------------------------------------------------------*/ /** * gen_fprf: Generate an FPRF value for the given floating-point value. * Helper function for set_fp_result(). * * [Parameters] * unit: RTLUnit to which to add code. * value: RTL register containing value for which to generate FPRF. * slot: Paired-slot index (0 or 1) to use if value is a vector. * [Return value] * RTL register containing FPRF value. */ static int gen_fprf(RTLUnit *unit, int value, int slot) { ASSERT(unit); if (rtl_register_is_vector(&unit->regs[value])) { const int slot_value = rtl_alloc_register( unit, rtl_vector_element_type(unit->regs[value].type)); rtl_add_insn(unit, RTLOP_VEXTRACT, slot_value, value, 0, slot); value = slot_value; } const bool is64 = (unit->regs[value].type == RTLTYPE_FLOAT64); const RTLDataType bits_type = is64 ? RTLTYPE_INT64 : RTLTYPE_INT32; const int bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, bits, value, 0, 0); const int bits_nonzero = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTUI, bits_nonzero, bits, 0, 0); const int sign = rtl_alloc_register(unit, RTLTYPE_INT32); const int exponent_zero = rtl_alloc_register(unit, RTLTYPE_INT32); const int exponent_max = rtl_alloc_register(unit, RTLTYPE_INT32); const int mantissa_zero = rtl_alloc_register(unit, RTLTYPE_INT32); if (is64) { rtl_add_insn(unit, RTLOP_SLTSI, sign, bits, 0, 0); const int exponent = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, exponent, bits, 0, 52 | 11<<8); rtl_add_insn(unit, RTLOP_SEQI, exponent_zero, exponent, 0, 0); rtl_add_insn(unit, RTLOP_SEQI, exponent_max, exponent, 0, 0x7FF); const int mantissa_test = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLLI, mantissa_test, bits, 0, 12); rtl_add_insn(unit, RTLOP_SEQI, mantissa_zero, mantissa_test, 0, 0); } else { rtl_add_insn(unit, RTLOP_SRLI, sign, bits, 0, 31); const int exponent = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, exponent, bits, 0, 23 | 8<<8); rtl_add_insn(unit, RTLOP_SEQI, exponent_zero, exponent, 0, 0); rtl_add_insn(unit, RTLOP_SEQI, exponent_max, exponent, 0, 0xFF); const int mantissa_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, mantissa_test, bits, 0, 9); rtl_add_insn(unit, RTLOP_SEQI, mantissa_zero, mantissa_test, 0, 0); } /* * FPRF table: * C < > E ? * --------- * 1 0 0 0 1 NaN * 0 1 0 0 1 -inf * 0 1 0 0 0 -normal * 1 1 0 0 0 -denorm * 1 0 0 1 0 -zero * 0 0 0 1 0 +zero * 1 0 1 0 0 +denorm * 0 0 1 0 0 +norm * 0 0 1 0 1 +inf * * Bit formulas: * nan = exponent_max & !mantissa_zero * nzn = !(E | nan) // "nonzero number" * C = (exponent_zero & bits_nonzero) | nan * < = sign & nzn * > = !sign & nzn * E = exponent_zero & mantissa_zero * ? = exponent_max */ const int un = exponent_max; const int eq = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, eq, exponent_zero, mantissa_zero, 0); const int mantissa_nonzero = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, mantissa_nonzero, mantissa_zero, 0, 1); const int nan = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, nan, exponent_max, mantissa_nonzero, 0); const int cls_temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, cls_temp, exponent_zero, bits_nonzero, 0); const int cls = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, cls, cls_temp, nan, 0); const int not_nzn = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, not_nzn, eq, nan, 0); const int nzn = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, nzn, not_nzn, 0, 1); const int not_sign = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, not_sign, sign, 0, 1); const int lt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, lt, sign, nzn, 0); const int gt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, gt, not_sign, nzn, 0); const int shifted_cls = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_cls, cls, 0, 4); const int shifted_lt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_lt, lt, 0, 3); const int shifted_gt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_gt, gt, 0, 2); const int shifted_eq = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_eq, eq, 0, 1); const int cls_lt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, cls_lt, shifted_cls, shifted_lt, 0); const int gt_eq = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, gt_eq, shifted_gt, shifted_eq, 0); const int cls_lt_un = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, cls_lt_un, cls_lt, un, 0); const int fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, fprf, cls_lt_un, gt_eq, 0); return fprf; } /*-----------------------------------------------------------------------*/ /** * round_fma_result_to_single: Round the result of a double-precision * fused multiply-add operation to single precision, taking into account * the possibility of rounding error caused by a tiny addend. * * See the documentation of BINREC_OPT_G_PPC_FAST_FMADDS for the rationale * behind this function. * * [Parameters] * ctx: Translation context. * result: Operation result (of type FLOAT64). * rtlop: RTL opcode for the FMA operation. * frA, frB, frC: Operands (of type FLOAT64). * [Return value] * Rounded value (of type FLOAT32). */ static int round_fma_result_to_single( GuestPPCContext *ctx, int result, RTLOpcode rtlop, int frA, int frB, int frC) { RTLUnit * const unit = ctx->unit; /* Don't ASSERT() over an error that already occurred. */ ASSERT(!result || unit->regs[result].type == RTLTYPE_FLOAT64); ASSERT(!frA || unit->regs[frA].type == RTLTYPE_FLOAT64); ASSERT(!frB || unit->regs[frB].type == RTLTYPE_FLOAT64); ASSERT(!frC || unit->regs[frC].type == RTLTYPE_FLOAT64); const int result_alias = rtl_alloc_alias_register(unit, RTLTYPE_FLOAT32); const int label_out = rtl_alloc_label(unit); /* Generate the rounded result now since it's what we'll use the vast * majority of the time. This also ensures that appropriate exceptions * from the rounding operation are set. */ const int result_rounded = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result_rounded, result, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, result_rounded, 0, result_alias); /* If FPSCR[RN] is not round-to-nearest, we don't have to do anything. */ const int fpscr = get_fpscr(ctx); const int rn = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, rn, fpscr, 0, FPSCR_RN_SHIFT | 2<<8); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, rn, 0, label_out); /* If the result is out of single-precision range (or is a NaN), we * don't have to do anything. */ const int result_bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, result_bits, result, 0, 0); const int exponent64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, exponent64, result_bits, 0, 52 | 11<<8); const int exponent = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, exponent, exponent64, 0, 0); const int exponent_temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, exponent_temp, exponent, result_bits, -874); const int exponent_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTUI, exponent_test, exponent_temp, 0, 1151-874); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, exponent_test, 0, label_out); /* If the result is not exactly between two single-precision values, * we don't have to do anything. This is a bit tricky because we * have to take denormals into account as well. */ const int mantissa = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, mantissa, result_bits, 0, 0 | 52<<8); const int mantissa_shift_denorm = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, mantissa_shift_denorm, exponent, 0, -862); const int is_denormal = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTUI, is_denormal, exponent, 0, 897); const int mantissa_shift_norm = rtl_imm32(unit, 35); const int mantissa_shift = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, mantissa_shift, mantissa_shift_denorm, mantissa_shift_norm, is_denormal); const int tie_test = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLL, tie_test, mantissa, mantissa_shift, 0); const int tie_value = rtl_imm64(unit, UINT64_C(1)<<63); const int is_tie = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, is_tie, tie_test, tie_value, 0); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, is_tie, 0, label_out); /* * The result is a tie between two single-precision values, so we may * need to re-round it. We do this by rerunning the operation in * round-toward-zero mode and checking the value and exactness of the * result. * * - If the result is exact, we don't need to do anything. * * - If the result has changed, then the exact result must be less than * the double-precision representation, so we subtract 1 from the bit * pattern and convert it to single precision. * * - If the result is unchanged but inexact, then the exact result must * be greater than the double-precision representation, so we add 1 * to the bit pattern and convert it to single precision. */ const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int clearexc = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FCLEAREXC, clearexc, fpstate, 0, 0); const int trunc_mode = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, trunc_mode, clearexc, 0, RTLFROUND_TRUNC); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, trunc_mode, 0, 0); const int test_result = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, rtlop, test_result, frA, frC, frB); const int test_fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, test_fpstate, 0, 0, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); const int test_bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, test_bits, test_result, 0, 0); const int test_inexact = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, test_inexact, test_fpstate, 0, RTLFEXC_INEXACT); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, test_inexact, 0, label_out); const int test_mantissa = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, test_mantissa, test_bits, 0, 0 | 52<<8); const int unchanged = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, unchanged, test_mantissa, mantissa, 0); const int label_round_up = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, unchanged, 0, label_round_up); const int rounded_down_bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ADDI, rounded_down_bits, result_bits, 0, -1); const int rounded_down = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, rounded_down, rounded_down_bits, 0, 0); const int result_down = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result_down, rounded_down, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, result_down, 0, result_alias); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_round_up); const int rounded_up_bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ADDI, rounded_up_bits, result_bits, 0, 1); const int rounded_up = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, rounded_up, rounded_up_bits, 0, 0); const int result_up = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result_up, rounded_up, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, result_up, 0, result_alias); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); const int result32 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, result32, 0, 0, result_alias); return result32; } /*-----------------------------------------------------------------------*/ /** * round_for_multiply: Round a double-precision floating-point value * to be used as the frC operand to a single-precision multiply or * multiply-add operation. Depending on the value of frC, the value of * frA may also be modified so that the multiply operation returns the * correct value. * * See the documentation of BINREC_OPT_G_PPC_FAST_FMULS for the rationale * behind this function. * * [Parameters] * ctx: Translation context. * frA_ptr: Pointer to RTL register of FLOAT64 type holding the first * multiplicand (frA); may be modified on return. * frC_ptr: Pointer to RTL register of FLOAT64 type holding the second * multiplicand (frC); may be modified on return. */ static void round_for_multiply(GuestPPCContext *ctx, int *frA_ptr, int *frC_ptr) { RTLUnit * const unit = ctx->unit; if (!ctx->alias_mulround_frA) { ctx->alias_mulround_frA = rtl_alloc_alias_register(unit, RTLTYPE_FLOAT64); } if (!ctx->alias_mulround_frC) { ctx->alias_mulround_frC = rtl_alloc_alias_register(unit, RTLTYPE_FLOAT64); } const int frA_alias = ctx->alias_mulround_frA; const int frC_alias = ctx->alias_mulround_frC; rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, *frA_ptr, 0, frA_alias); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, *frC_ptr, 0, frC_alias); const int label_out = rtl_alloc_label(unit); /* If the mantissa is zero or either value is infinity/NaN, we don't * need to round anything. */ const int frC_bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, frC_bits, *frC_ptr, 0, 0); const int frA_bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, frA_bits, *frA_ptr, 0, 0); const int frC_mantissa = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, frC_mantissa, frC_bits, 0, 0 | 52<<8); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, frC_mantissa, 0, label_out); const int frA_exponent = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, frA_exponent, frA_bits, 0, 52 | 11<<8); const int frC_exponent = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, frC_exponent, frC_bits, 0, 52 | 11<<8); const int frA_inf_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, frA_inf_test, frA_exponent, 0, 0x7FF); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, frA_inf_test, 0, label_out); const int frC_inf_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, frC_inf_test, frC_exponent, 0, 0x7FF); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, frC_inf_test, 0, label_out); /* If the second operand is a denormal, we normalize it before * rounding, adjusting the exponent of the other operand accordingly. * If the other operand becomes denormal, the product will round to * zero in any case, so we just abort and let the operation proceed * normally. */ const int label_normalized = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, frC_exponent, 0, label_normalized); /* To normalize frC, we need to shift the mantissa left until we shift * out a 1 (which then moves to the exponent). We could loop over a * single-bit shift, but it's much faster to count zero bits. */ const int norm_temp = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_CLZ, norm_temp, frC_mantissa, 0, 0); const int norm_shift = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ADDI, norm_shift, norm_temp, 0, -11); const int frA_exp_new = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SUB, frA_exp_new, frA_exponent, norm_shift, 0); const int frA_is_normal = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTSI, frA_is_normal, frA_exp_new, 0, 0); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, frA_is_normal, 0, label_out); /* Safe to normalize, so actually modify the values. */ const int sign_bit = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_LOAD_IMM, sign_bit, 0, 0, UINT64_C(1)<<63); const int frC_sign = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_AND, frC_sign, frC_bits, sign_bit, 0); const int frC_mantissa_shifted = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLL, frC_mantissa_shifted, frC_mantissa, norm_shift, 0); const int frA_norm_adjusted = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFINS, frA_norm_adjusted, frA_bits, frA_exp_new, 52 | 11<<8); const int frC_normalized = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_OR, frC_normalized, frC_sign, frC_mantissa_shifted, 0); const int frA_norm_adjusted_fp = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, frA_norm_adjusted_fp, frA_norm_adjusted, 0, 0); const int frC_normalized_fp = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, frC_normalized_fp, frC_normalized, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, frA_norm_adjusted_fp, 0, frA_alias); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, frC_normalized_fp, 0, frC_alias); /* Round the normalized value of frC. Note that this rounding ignores * FPSCR[RN] and always rounds to nearest based on the bit in the * rounding position. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_normalized); const int frC_preround_fp = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, frC_preround_fp, 0, 0, frC_alias); const int frC_preround = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, frC_preround, frC_preround_fp, 0, 0); const uint64_t round_bit = UINT64_C(1) << 27; const int frC_truncated = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ANDI, frC_truncated, frC_preround, 0, -round_bit); const int frC_round_bit = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ANDI, frC_round_bit, frC_preround, 0, round_bit); const int frC_rounded = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ADD, frC_rounded, frC_truncated, frC_round_bit, 0); const int frC_rounded_fp = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, frC_rounded_fp, frC_rounded, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, frC_rounded_fp, 0, frC_alias); /* If the rounding changed a large value into an infinity, subtract a * power of two from frC and add it to frA. */ const int frC_rounded_exp = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, frC_rounded_exp, frC_rounded, 0, 52 | 11<<8); const int frC_rounded_inf_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, frC_rounded_inf_test, frC_rounded_exp, 0, 0x7FF); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, frC_rounded_inf_test, 0, label_out); const int exp_low_bit = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_LOAD_IMM, exp_low_bit, 0, 0, UINT64_C(1)<<52); const int frC_halved = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SUB, frC_halved, frC_rounded, exp_low_bit, 0); const int frC_halved_fp = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, frC_halved_fp, frC_halved, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, frC_halved_fp, 0, frC_alias); /* frA might be denormal or huge, so we have to check the exponent. */ const int frA_exponent_2 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, frA_exponent_2, frA_bits, 0, 52 | 11<<8); const int label_frA_denorm = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, frA_exponent_2, 0, label_frA_denorm); /* If doubling frA would turn it into an infinity, just leave it alone; * the multiply will overflow anyway. */ const int frA_2_inf_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTUI, frA_2_inf_test, frA_exponent_2, 0, 0x7FD); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, frA_2_inf_test, 0, label_out); const int frA_doubled_norm = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ADD, frA_doubled_norm, frA_bits, exp_low_bit, 0); const int frA_doubled_norm_fp = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, frA_doubled_norm_fp, frA_doubled_norm, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, frA_doubled_norm_fp, 0, frA_alias); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_frA_denorm); const int frA_doubled_denorm_temp = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLLI, frA_doubled_denorm_temp, frA_bits, 0, 1); const int frA_doubled_denorm = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFINS, frA_doubled_denorm, frA_bits, frA_doubled_denorm_temp, 0 | 63<<8); const int frA_doubled_denorm_fp = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, frA_doubled_denorm_fp, frA_doubled_denorm, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, frA_doubled_denorm_fp, 0, frA_alias); /* Return the possibly modified values. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); const int new_frA = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, new_frA, 0, 0, frA_alias); const int new_frC = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, new_frC, 0, 0, frC_alias); *frA_ptr = new_frA; *frC_ptr = new_frC; } /*-----------------------------------------------------------------------*/ /** * set_fp_result: Set FPR[index] and FPSCR based on the given * floating-point value and the current host exception state. If an * invalid-operation exception has been raised and FPSCR[VE] is set, the * target FPR and FPSCR[FPRF] will not be written. * * This function will not work correctly for paired-single result values * if the operation has any non-SNaN invalid-operation exceptions and the * BINREC_OPT_G_PPC_IGNORE_FPSCR_VXFOO (or BINREC_OPT_G_PPC_NO_FPSCR_STATE) * optimization is not enabled. * * [Parameters] * ctx: Translation context. * index: Index of FPR to set. * result: RTL register containing result value. * fprf_slot: Paired-slot index (0 or 1) from which to set FPRF, if * result is a paired-single value. * frA, frB, frC: RTL registers containing operand values, or 0 for no * operand. Pass the second operand to fmul[s]/ps_mul in frB * instead of frC. * vxfoo_snan: FPSCR_VXFOO bitmask indicating the exception bit(s) to * set for an invalid-operation exception involving SNaNs * (FPSCR_VXSNAN is implicitly added to this bitmask). * vxfoo_no_snan: FPSCR_VXFOO bitmask indicating which invalid * exceptions other than VXSNAN can be raised. Zero indicates * that only SNaNs can trigger VX. * check_vx: True to check for invalid-operation exceptions. * check_zx: True to check for divide-by-zero exceptions. * set_xx: True to set FPSCR[XX] when an inexact exception is raised; * false to only set FPSCR[FI] (for fres). * snan_safe: True if the result is known not to contain SNaNs, false * for ps_sum[01]. */ static void set_fp_result(GuestPPCContext *ctx, int index, int result, int fprf_slot, int frA, int frB, int frC, uint32_t vxfoo_snan, uint32_t vxfoo_no_snan, bool check_vx, bool check_zx, bool set_xx, bool snan_safe) { if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) { set_fpr(ctx, index, result); if (snan_safe) { ctx->fpr_is_safe |= 1 << index; if (ctx->unit->regs[result].type != RTLTYPE_FLOAT64) { ctx->ps1_is_safe |= 1 << index; } } return; } if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_IGNORE_FPSCR_VXFOO) { vxfoo_no_snan = 0; } RTLUnit * const unit = ctx->unit; const int fpscr = get_fpscr(ctx); /* FPSCR is changed conditionally, so we can't save it. */ ctx->live.fpscr = 0; ctx->last_set.fpscr = -1; ctx->live.fr_fi_fprf = 0; ctx->last_set.fr_fi_fprf = -1; /* Similarly for the output register. */ flush_fpr(ctx, index, true); const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int clearexc = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FCLEAREXC, clearexc, fpstate, 0, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, clearexc, 0, 0); int label_out = rtl_alloc_label(unit); int label_exception_abort = 0; if (check_vx) { const int invalid = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, invalid, fpstate, 0, RTLFEXC_INVALID); const int label_no_vx = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, invalid, 0, label_no_vx); int label_check_ve_snan = 0; if (vxfoo_no_snan) { const int label_snan = rtl_alloc_label(unit); int label_frB_snan = 0; if (frA) { check_snan(ctx, frA, label_snan); } if (frC) { check_snan(ctx, frC, label_snan); } if (vxfoo_no_snan == (FPSCR_VXIMZ | FPSCR_VXISI)) { /* Special handling for FMA instructions; see notes below. */ label_frB_snan = rtl_alloc_label(unit); check_snan(ctx, frB, label_frB_snan); } else { check_snan(ctx, frB, label_snan); } /* If there are multiple exception types, we have to check for each * one separately. */ int label_check_ve = 0; if (vxfoo_no_snan == (FPSCR_VXIDI | FPSCR_VXZDZ)) { // fdiv, fdivs const int bits_type = (unit->regs[frA].type == RTLTYPE_FLOAT64 ? RTLTYPE_INT64 : RTLTYPE_INT32); const int frA_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, frA_bits, frA, 0, 0); const int frB_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, frB_bits, frB, 0, 0); const int both_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, both_bits, frA_bits, frB_bits, 0); const int zero_test = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, zero_test, both_bits, 0, 1); const int label_vxidi = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, zero_test, 0, label_vxidi); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXZDZ); if (ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN) { label_check_ve_snan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_check_ve_snan); } else { label_check_ve = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_check_ve); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_vxidi); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXIDI); } else if (vxfoo_no_snan == (FPSCR_VXIMZ | FPSCR_VXISI)) { /* For an FMA operation to cause VX with no SNaNs, one of * the following must be true: * - frA = +/-inf, frC = 0 (frB don't care) * - frA = 0, frC = +/-inf (frB don't care) * - frA = +/-inf, frC = nonzero, frB = -/+inf * - frA = nonzero, frC = +/-inf, frB = -/+inf * In other words, at least one of frA and frC must be an * infinity, so if we see that frA is not infinite, we can * assume that frC is infinite without checking. */ const int bits_type = (unit->regs[frA].type == RTLTYPE_FLOAT64 ? RTLTYPE_INT64 : RTLTYPE_INT32); const int frA_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, frA_bits, frA, 0, 0); const int frC_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, frC_bits, frC, 0, 0); const int frA_zero_test = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, frA_zero_test, frA_bits, 0, 1); const int frC_zero_test = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, frC_zero_test, frC_bits, 0, 1); int frA_inf_test; if (bits_type == RTLTYPE_INT32) { frA_inf_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, frA_inf_test, frA_zero_test, 0, 0xFF000000); } else { const int inf_sll1 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_LOAD_IMM, inf_sll1, 0, 0, UINT64_C(0xFFE0000000000000)); frA_inf_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, frA_inf_test, frA_zero_test, inf_sll1, 0); } const int label_frC_inf = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, frA_inf_test, 0, label_frC_inf); const int label_vximz = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, frC_zero_test, 0, label_vximz); const int label_vxisi = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_vxisi); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_frC_inf); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, frA_zero_test, 0, label_vxisi); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_vximz); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXIMZ); if (ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN) { label_check_ve_snan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_check_ve_snan); } else { label_check_ve = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_check_ve); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_vxisi); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXISI); } else { /* Make sure only one VXFOO bit is set. */ ASSERT((vxfoo_no_snan & (vxfoo_no_snan - 1)) == 0); set_fpscr_exceptions(ctx, fpscr, vxfoo_no_snan); } if (ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN) { if (!label_check_ve_snan) { label_check_ve_snan = rtl_alloc_label(unit); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_check_ve_snan); } else { if (label_check_ve) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_check_ve); } const int ve_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ve_test, fpscr, 0, FPSCR_VE); int label_no_ve_default_nan = 0; label_no_ve_default_nan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, ve_test, 0, label_no_ve_default_nan); const int fr_fi_fprf = get_fr_fi_fprf(ctx); const int fr_fi_cleared = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fr_fi_cleared, fr_fi_fprf, 0, 0x1F); set_fr_fi_fprf_and_flush(ctx, fr_fi_cleared); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_ve_default_nan); RTLDataType default_nan_type = unit->regs[result].type; const int default_nan = rtl_alloc_register(unit, default_nan_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, default_nan, 0, 0, default_nan_type == RTLTYPE_FLOAT64 ? UINT64_C(0x7FF8000000000000) : 0x7FC00000); set_fpr_and_flush(ctx, index, default_nan, true); const int default_nan_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, default_nan_fprf, 0, 0, 0x11); set_fr_fi_fprf_and_flush(ctx, default_nan_fprf); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); } /* A fused multiply-add of the form inf*0+SNaN (or 0*inf-SNaN, * etc.) raises both VXIMZ and VXSNAN, so we need an extra * check if frB triggers VXSNAN. */ if (vxfoo_no_snan == (FPSCR_VXIMZ | FPSCR_VXISI)) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_frB_snan); /* In the interest of not overly bloating the output code * (possibly a lost cause at this point...) we just calculate * frA*frC directly and see if that raises an exception. We * check frA and frC for SNaNs before frB, so if we get here, * only inf*0 will trigger an invalid-operation exception. * Note that exceptions are already cleared here, so we don't * need an extra clear before the FMUL. */ const int mul_test = rtl_alloc_register(unit, unit->regs[frA].type); rtl_add_insn(unit, RTLOP_FMUL, mul_test, frA, frC, 0); const int mul_state = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, mul_state, 0, 0, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, clearexc, 0, 0); const int mul_invalid = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, mul_invalid, mul_state, 0, RTLFEXC_INVALID); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, mul_invalid, 0, label_snan); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXSNAN | FPSCR_VXIMZ); if (!label_check_ve_snan) { label_check_ve_snan = rtl_alloc_label(unit); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_check_ve_snan); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_snan); } // if (vxfoo_no_snan) set_fpscr_exceptions(ctx, fpscr, FPSCR_VXSNAN | vxfoo_snan); if (label_check_ve_snan) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_check_ve_snan); } const int ve_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ve_test, fpscr, 0, FPSCR_VE); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, ve_test, 0, label_no_vx); if (check_zx) { /* We technically don't need this label if the result is a * paired-single value, but paired-single arithmetic operations * handle VX by falling back to scalars, so we'll never reach * this point with check_zx true and a paired-single value. */ label_exception_abort = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_exception_abort); } const int fr_fi_fprf = get_fr_fi_fprf(ctx); const int fr_fi_cleared = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fr_fi_cleared, fr_fi_fprf, 0, 0x1F); set_fr_fi_fprf_and_flush(ctx, fr_fi_cleared); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_vx); } if (check_zx) { const int zerodiv = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, zerodiv, fpstate, 0, RTLFEXC_ZERO_DIVIDE); const int label_no_zx = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, zerodiv, 0, label_no_zx); set_fpscr_exceptions(ctx, fpscr, FPSCR_ZX); const int ze_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ze_test, fpscr, 0, FPSCR_ZE); if (rtl_register_is_vector(&unit->regs[result])) { /* For ps_div, we still have to set FR/FI/FPRF before aborting, * along with any other exceptions if the other slot was not * also a zero-divide. */ rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, ze_test, 0, label_no_zx); const RTLDataType scalar_type = rtl_vector_element_type(unit->regs[frA].type); const int frA_ps0 = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frA_ps0, frA, 0, 0); const int frA_ps1 = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frA_ps1, frA, 0, 1); const int frB_ps0 = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frB_ps0, frB, 0, 0); const int frB_ps1 = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frB_ps1, frB, 0, 1); int result_ps0 = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_FDIV, result_ps0, frA_ps0, frB_ps0, 0); if (scalar_type != RTLTYPE_FLOAT32) { const int result64 = result_ps0; result_ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result_ps0, result64, 0, 0); } const int fpstate_ps0 = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate_ps0, 0, 0, 0); int result_ps1 = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_FDIV, result_ps1, frA_ps1, frB_ps1, 0); /* Give the FDIV time to finish before we FCVT it. */ const int zx_ps0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, zx_ps0, fpstate_ps0, 0, RTLFEXC_ZERO_DIVIDE); const int fprf_full_ps0 = gen_fprf(unit, result_ps0, 0); const int fprf_zero_ps0 = rtl_imm32(unit, 0); const int fprf_ps0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, fprf_ps0, fprf_zero_ps0, fprf_full_ps0, zx_ps0); if (scalar_type != RTLTYPE_FLOAT32) { const int result64 = result_ps1; result_ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result_ps1, result64, 0, 0); } const int fpstate_ps1 = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate_ps1, 0, 0, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, clearexc, 0, 0); const int zx_xx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, zx_xx, fpstate_ps1, 0, RTLFEXC_INEXACT); const int label_zx_no_xx = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, zx_xx, 0, label_zx_no_xx); set_fpscr_exceptions(ctx, 0, FPSCR_XX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_zx_no_xx); const int zx_ox = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, zx_ox, fpstate_ps1, 0, RTLFEXC_OVERFLOW); const int label_zx_no_ox = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, zx_ox, 0, label_zx_no_ox); set_fpscr_exceptions(ctx, 0, FPSCR_OX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_zx_no_ox); const int zx_ux = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, zx_ux, fpstate_ps1, 0, RTLFEXC_UNDERFLOW); const int label_zx_no_ux = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, zx_ux, 0, label_zx_no_ux); set_fpscr_exceptions(ctx, 0, FPSCR_UX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_zx_no_ux); const int fi_sll5 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, fi_sll5, zx_xx, 0, 5); const int fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, fi_fprf, fi_sll5, fprf_ps0, 0); set_fr_fi_fprf_and_flush(ctx, fi_fprf); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); } else { // Not paired-single. /* The only cases in which we pass check_vx = false are for * paired-single results, so if we get here, we must have * already come through the VX check and thus * label_exception_abort will be set. */ rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, ze_test, 0, label_exception_abort); } // if (rtl_register_is_vector(&unit->regs[result])) rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_zx); } // if (check_zx) set_fpr_and_flush(ctx, index, result, snan_safe); const int fprf = gen_fprf(unit, result, fprf_slot); const int inexact = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, inexact, fpstate, 0, RTLFEXC_INEXACT); const int shifted_fi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_fi, inexact, 0, 5); const int fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, fi_fprf, fprf, shifted_fi, 0); set_fr_fi_fprf_and_flush(ctx, fi_fprf); if (set_xx) { const int label_no_xx = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, inexact, 0, label_no_xx); set_fpscr_exceptions(ctx, 0, FPSCR_XX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_xx); } const int overflow = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, overflow, fpstate, 0, RTLFEXC_OVERFLOW); const int label_no_ox = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, overflow, 0, label_no_ox); set_fpscr_exceptions(ctx, 0, FPSCR_OX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_ox); const int underflow = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, underflow, fpstate, 0, RTLFEXC_UNDERFLOW); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, underflow, 0, label_out); set_fpscr_exceptions(ctx, 0, FPSCR_UX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); } /*-----------------------------------------------------------------------*/ /** * set_ps_result: Set FPR[index] and FPSCR based on the given * paired-single value and the current host exception state. If an * invalid-operation exception has been raised and FPSCR[VE] is set, the * target FPR will not be written. * * Analog of set_fp_result() for paired-single instructions which can * generate invalid-operation exceptions. * * [Parameters] * ctx: Translation context. * index: Index of FPR to set. * result: RTL register containing result value. * rtlop: RTL opcode for the operation to be performed, RTLOP_FDIV for * ps_res, or RTLOP_FSQRT for ps_rsqrte. * src1: First source operand (vector of 1.0f for ps_res and ps_rsqrte). * src2: Second source operand. * one: RTL register containing 1.0f for ps_res and ps_rsqrte; 0 for * other operations. * use_float32: True if source operands are of type FLOAT32, false if * they are of type FLOAT64. * vxfoo_no_snan: FPSCR_VXFOO bitmask indicating which invalid * exceptions other than VXSNAN can be raised. Zero indicates * that only SNaNs can trigger VX. */ static void set_ps_result(GuestPPCContext *ctx, int index, int result, RTLOpcode rtlop, int src1, int src2, int one, bool use_float32, uint32_t vxfoo_no_snan) { const bool is_res = (one && rtlop == RTLOP_FDIV); const bool is_rsqrte = (one && rtlop == RTLOP_FSQRT); ASSERT(is_res || is_rsqrte || !one); const RTLOpcode real_rtlop = is_rsqrte ? RTLOP_FDIV : rtlop; const RTLDataType type = use_float32 ? RTLTYPE_V2_FLOAT32 : RTLTYPE_V2_FLOAT64; RTLUnit * const unit = ctx->unit; /* If an invalid-operation exception occurred and either (1) we're * setting VXFOO exception flags or (2) we want to match guest NaN * behavior, we have to re-run the operation separately on each * paired-single slot, since each slot could raise a different VXFOO * exception or we might need to write a PowerPC default QNaN. * (FIXME: We should find some way to make these callable subroutines * instead of having to inline them at every guest instruction.) */ int label_skip_set_result = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) && (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_IGNORE_FPSCR_VXFOO) || !(ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN))) { const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int has_vx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, has_vx, fpstate, 0, RTLFEXC_INVALID); const int label_do_set_result = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, has_vx, 0, label_do_set_result); /* Save the current value of the output register so we can restore * it later in case FPSCR[VE] is set. */ int saved_frD; if (ctx->live.fpr[index]) { saved_frD = ctx->live.fpr[index]; ctx->live.fpr[index] = 0; ctx->fpr_dirty &= ~(1 << index); ctx->fpr_is_safe &= ~(1 << index); ctx->ps1_is_safe &= ~(1 << index); ctx->fpr_raw[index] = 0; ctx->ps_raw[index] = 0; } else { saved_frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, saved_frD, 0, 0, ctx->alias.fpr[index]); } const int fpstate_cleared = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FCLEAREXC, fpstate_cleared, fpstate, 0, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate_cleared, 0, 0); /* Perform the operation and call set_fp_result() to set * appropriate exception flags (and possibly modify the result) * for each slot. We use the frD alias as a temporary for * simplicity's sake. */ const int scalar_type = rtl_vector_element_type(type); int result_ps[2], fi_fprf_ps[2]; for (int slot = 0; slot < 2; slot++) { int op_src1; if (one) { op_src1 = one; } else { op_src1 = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, op_src1, src1, 0, slot); } const int src2_ps = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, src2_ps, src2, 0, slot); int op_src2; if (is_rsqrte) { op_src2 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FSQRT, op_src2, src2_ps, 0, 0); } else { op_src2 = src2_ps; } result_ps[slot] = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, real_rtlop, result_ps[slot], op_src1, op_src2, 0); if (!use_float32) { const int result64 = result_ps[slot]; result_ps[slot] = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result_ps[slot], result64, 0, 0); } set_fp_result(ctx, index, result_ps[slot], 0, op_src1, src2_ps, 0, 0, vxfoo_no_snan, true, real_rtlop == RTLOP_FDIV, !(is_res || is_rsqrte), true); /* This will always be a direct alias access, so we might as * well avoid extra conversions to and from float32. */ result_ps[slot] = get_fpr_as_type(ctx, index, RTLTYPE_FLOAT64); ctx->live.fpr[index] = 0; if (is_rsqrte && slot == 1) { fi_fprf_ps[slot] = 0; // Not used. } else { fi_fprf_ps[slot] = get_fr_fi_fprf(ctx); } } /* FPRF is set from ps0 only; FI is set if either slot is inexact. * This update takes place regardless of whether FPSCR[VE] is set. * (That's probably a hardware bug, but if we've come this far we * might as well try to stay bug-compatible.) */ int final_fi_fprf; if (is_rsqrte) { // ps_rsqrte doesn't set FI. final_fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, final_fi_fprf, fi_fprf_ps[0], 0, 0x1F); } else { const int fi_ps1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fi_ps1, fi_fprf_ps[1], 0, 0x20); final_fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, final_fi_fprf, fi_fprf_ps[0], fi_ps1, 0); } set_fr_fi_fprf_and_flush(ctx, final_fi_fprf); /* Merge the two outputs to frD, or restore the original value of * frD if FPSCR[VE] is set. */ const int fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fpscr, 0, 0, ctx->alias.fpscr); const int has_ve = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, has_ve, fpscr, 0, FPSCR_VE); const int label_no_ve = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, has_ve, 0, label_no_ve); set_fpr_and_flush(ctx, index, saved_frD, false); label_skip_set_result = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_skip_set_result); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_ve); const int new_result = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBUILD2, new_result, result_ps[0], result_ps[1], 0); set_fpr_and_flush(ctx, index, new_result, true); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_skip_set_result); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_do_set_result); } /* If no invalid-operation exception occurred (or if we're ignoring * exceptions), we can just store the result directly. */ set_fp_result(ctx, index, result, 0, src1, src2, 0, 0, 0, false, real_rtlop == RTLOP_FDIV, true, true); if (label_skip_set_result) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_skip_set_result); } } /*-----------------------------------------------------------------------*/ /** * store_float64_as_32: Convert a 64-bit floating-point value to 32 bits * and store it to memory following the PowerPC conversion rules for * single-precision stores. * * [Parameters] * unit: RTLUnit to operate on. * rtlop: Store opcode (either STORE or STORE_BR). * host_address: RTL register containing the host address for the store. * value: RTL register containing the value (of type FLOAT64). * disp: Byte displacement for store operation. * flush_denormal: True to flush a denormal value to zero (as for * psq_st), false to leave it as a denormal (as for stfs). */ static void store_float64_as_32(RTLUnit *unit, RTLOpcode rtlop, int host_address, int value, int disp, bool flush_denormal) { const int bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, bits, value, 0, 0); const int high_word64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SRLI, high_word64, bits, 0, 32); const int high_word = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, high_word, high_word64, 0, 0); const int range_test = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLLI, range_test, bits, 0, 1); const int label_normal = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, range_test, 0, label_normal); const int normal_limit = rtl_imm64(unit, (UINT64_C(0x3810000000000000) << 1) - 1); const int normal_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTU, normal_test, range_test, normal_limit, 0); const int label_denormal = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, normal_test, 0, label_denormal); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_normal); const int high_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, high_bits, high_word, 0, 0xC0000000); const int low_bits64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BFEXT, low_bits64, bits, 0, 29 | 30<<8); const int low_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, low_bits, low_bits64, 0, 0); const int normal_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, normal_bits, high_bits, low_bits, 0); rtl_add_insn(unit, rtlop, 0, host_address, normal_bits, disp); const int label_out = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_denormal); if (flush_denormal) { /* Zero is zero whether byte-reversed or not, but we have to * preserve the sign, so we can't just blindly store integer zero * here. */ const int signed_zero = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, signed_zero, high_bits, 0, 0x80000000); rtl_add_insn(unit, rtlop, 0, host_address, signed_zero, disp); } else { const int exponent = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SRLI, exponent, range_test, 0, 53); const int frac64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SRLI, frac64, range_test, 0, 31); const int frac_temp1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, frac_temp1, frac64, 0, 0); const int cst_896 = rtl_imm64(unit, 896); const int frac_temp2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, frac_temp2, frac_temp1, 0, 0x7FFFFF); const int shift = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SUB, shift, cst_896, exponent, 0); const int frac = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, frac, frac_temp2, 0, 1<<22); const int sign = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, sign, high_word, 0, 0x80000000); const int shifted_frac = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRL, shifted_frac, frac, shift, 0); const int denormal_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, denormal_bits, sign, shifted_frac, 0); rtl_add_insn(unit, rtlop, 0, host_address, denormal_bits, disp); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); } /*-----------------------------------------------------------------------*/ /** * set_nia: Set the NIA field of the processor state block to the value * of the given RTL register. * * [Parameters] * ctx: Translation context. * reg: RTL register containing value to store. */ static inline void set_nia(GuestPPCContext *ctx, int reg) { rtl_add_insn(ctx->unit, RTLOP_SET_ALIAS, 0, reg, 0, ctx->alias.nia); } /*-----------------------------------------------------------------------*/ /** * set_nia_imm: Set the NIA field of the processor state block to the * given literal value. * * [Parameters] * ctx: Translation context. * nia: Address to store in the state block's NIA field. */ static inline void set_nia_imm(GuestPPCContext *ctx, uint32_t nia) { set_nia(ctx, rtl_imm32(ctx->unit, nia)); } /*-----------------------------------------------------------------------*/ /** * update_cr0: Add RTL instructions to update the value of CR0 based on * the result of an integer operation. * * [Parameters] * ctx: Translation context. * result: RTL register containing operation result. */ static void update_cr0(GuestPPCContext *ctx, int result) { RTLUnit * const unit = ctx->unit; const int lt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTSI, lt, result, 0, 0); const int gt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTSI, gt, result, 0, 0); const int eq = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, eq, result, 0, 0); const int so = get_xer_so(ctx); set_crf(ctx, 0, lt, gt, eq, so); } /*-----------------------------------------------------------------------*/ /** * update_cr1: Add RTL instructions to update the value of CR1 based on * the high 4 bits of FPSCR. * * [Parameters] * ctx: Translation context. */ static void update_cr1(GuestPPCContext *ctx) { RTLUnit * const unit = ctx->unit; const int fpscr = get_fpscr(ctx); int fex, vx; get_fpscr_fex_vx(ctx, fpscr, &fex, &vx); const int fx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, fx, fpscr, 0, 31 | 1<<8); const int ox = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, ox, fpscr, 0, 28 | 1<<8); set_crf(ctx, 1, fx, fex, vx, ox); } /*-----------------------------------------------------------------------*/ /** * update_rounding_mode: Add RTL instructions to update the current * floating-point rounding mode based on the RN field of FPSCR. * * [Parameters] * ctx: Translation context. */ static void update_rounding_mode(GuestPPCContext *ctx) { RTLUnit * const unit = ctx->unit; const int label_n = rtl_alloc_label(unit); const int label_z = rtl_alloc_label(unit); const int label_p = rtl_alloc_label(unit); const int label_out = rtl_alloc_label(unit); const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int fpscr = get_fpscr(ctx); const int rn = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, rn, fpscr, 0, FPSCR_RN); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, rn, 0, label_n); const int test_1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTUI, test_1, rn, 0, 2); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, test_1, 0, label_z); const int test_2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, test_2, rn, 0, 2); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, test_2, 0, label_p); const int fpstate_m = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, fpstate_m, fpstate, 0, RTLFROUND_FLOOR); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate_m, 0, 0); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_p); const int fpstate_p = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, fpstate_p, fpstate, 0, RTLFROUND_CEIL); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate_p, 0, 0); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_z); const int fpstate_z = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, fpstate_z, fpstate, 0, RTLFROUND_TRUNC); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate_z, 0, 0); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_n); const int fpstate_n = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, fpstate_n, fpstate, 0, RTLFROUND_NEAREST); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate_n, 0, 0); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); } /*-----------------------------------------------------------------------*/ /** * return_from_unit: Add RTL instructions to return from the current * translation unit. * * [Parameters] * ctx: Translation context. * address: Address of the current instruction, or ~0 to suppress * calling the post-instruction callback (if any). * nia: RTL register containing the address of the next instruction to * execute. * need_flush: True if live registers should be flushed before returning. */ static void return_from_unit(GuestPPCContext *ctx, uint32_t address, int nia, bool need_flush) { RTLUnit * const unit = ctx->unit; if (need_flush) { flush_live_regs(ctx, true); } set_nia(ctx, nia); if (address != ~0u) { post_insn_callback(ctx, address); } if (ctx->handle->use_chaining && unit->regs[nia].source == RTLREG_CONSTANT) { guest_ppc_flush_cr(ctx, false); guest_ppc_flush_fpscr(ctx); const int chain_insn = rtl_add_chain_insn(unit, ctx->psb_reg, ctx->membase_reg); const int lookup_func = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD, lookup_func, ctx->psb_reg, 0, ctx->handle->setup.state_offset_chain_lookup); const int lookup_result = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_CALL, lookup_result, lookup_func, ctx->psb_reg, nia); rtl_add_insn(unit, RTLOP_CHAIN_RESOLVE, 0, lookup_result, 0, chain_insn); rtl_add_insn(unit, RTLOP_RETURN, 0, ctx->psb_reg, 0, 0); } else { rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, guest_ppc_get_epilogue_label(ctx)); } } /*************************************************************************/ /*************************** Translation core ****************************/ /*************************************************************************/ /** * translate_illegal: Translate an illegal instruction word. * * [Parameters] * ctx: Translation context. * insn: Instruction word. */ static inline void translate_illegal(GuestPPCContext *ctx, uint32_t insn) { rtl_add_insn(ctx->unit, RTLOP_ILLEGAL, 0, 0, 0, 0); } /*-----------------------------------------------------------------------*/ /** * translate_arith_imm: Translate an integer register-immediate arithmetic * instruction. For addi and addis, rA is assumed to be nonzero. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL register-immediate instruction to perform the operation. * shift_imm: True if the immediate value should be shifted left 16 bits. * set_ca: True if XER[CA] should be set according to the result. * set_cr0: True if CR0 should be set according to the result. */ static void translate_arith_imm( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool shift_imm, bool set_ca, bool set_cr0) { RTLUnit * const unit = ctx->unit; const int rA = get_gpr(ctx, insn_rA(insn)); const int32_t imm = shift_imm ? insn_SIMM(insn)<<16 : insn_SIMM(insn); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, result, rA, 0, imm); set_gpr(ctx, insn_rD(insn), result); if (set_ca) { ASSERT(rtlop == RTLOP_ADDI); const int xer = get_xer(ctx); const int ca = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTUI, ca, result, 0, imm); const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_xer, xer, ca, XER_CA_SHIFT | 1<<8); set_xer(ctx, new_xer, -1); } if (set_cr0) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_addsub_reg: Translate an integer register-register add or * subtract instruction. * * This function implements addition and subtraction as a three-operand * addition operation following the PowerPC documentation, which is * relatively slow both to translate and to execute. Simple operations * which set no flags should add appropriate RTL instructions directly * rather than calling this function. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * srcB_sel: Selector indicating the third operand to the addition: * 0 or -1 (constant), or 1 (use rB). * srcC_sel: Selector indicating the third operand to the addition: * 0 or 1 (constant), or -1 to use XER[CA]. * invert_rA: True to invert (one's-complement) rA, for a subf operation. * set_ca: True if XER[CA] should be set according to the result. */ static void translate_addsub_reg( GuestPPCContext *ctx, uint32_t insn, int srcB_sel, int srcC_sel, bool invert_rA, bool set_ca) { RTLUnit * const unit = ctx->unit; int result = 0; int srcA = get_gpr(ctx, insn_rA(insn)); if (invert_rA) { const int inverted = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NOT, inverted, srcA, 0, 0); srcA = inverted; } int srcB = 0; if (srcB_sel > 0) { srcB = get_gpr(ctx, insn_rB(insn)); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADD, result, srcA, srcB, 0); } else if (srcB_sel < 0) { result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, result, srcA, 0, -1); } if (srcC_sel > 0) { const int temp = result ? result : srcA; result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, result, temp, 0, 1); } else if (srcC_sel < 0) { const int temp = result ? result : srcA; const int xer = get_xer(ctx); const int ca = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, ca, xer, 0, XER_CA_SHIFT | 1<<8); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADD, result, temp, ca, 0); } set_gpr(ctx, insn_rD(insn), result); /* Extract high bits of values needed below so we don't have to do it * twice. */ const int a = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, a, srcA, 0, 31); int b = 0, a_xor_b = 0; if (srcB_sel > 0) { b = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, b, srcB, 0, 31); a_xor_b = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XOR, a_xor_b, a, b, 0); } const int r = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, r, result, 0, 31); if (set_ca) { /* Carry calculation: XER[CA] = (a && b) || ((a != b) && !result) * where a, b, and result are the high bit of each value. * (Conceptually: carry always occurs if the high bit of both * inputs is set; when the high bit of exactly one input is set, * carry occurred if the high bit of the result is clear.) * We can also treat "a ^ b" as "a | b", since the latter gives * the same result, and we do so in the srcB_sel < 0 case. */ int ca; const int nr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, nr, r, 0, 1); if (srcB_sel > 0) { const int a_and_b = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, a_and_b, a, b, 0); const int and_nr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, and_nr, a_xor_b, nr, 0); ca = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, ca, a_and_b, and_nr, 0); } else if (srcB_sel < 0) { // b = 1 --> CA = a | !result ca = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, ca, a, nr, 0); } else { // b = 0 --> CA = a & !result ca = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, ca, a, nr, 0); } const int xer = get_xer(ctx); const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_xer, xer, ca, XER_CA_SHIFT | 1<<8); set_xer(ctx, new_xer, -1); } if (insn_OE(insn)) { /* Overflow calculation: XER[OV] = (a == b) && (a != result) * (where a, b, and result are the high bit of each value) * which we implement as: !(a ^ b) & (a ^ result) */ int ov; if (srcB_sel > 0) { const int n_a_xor_b = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, n_a_xor_b, a_xor_b, 0, 1); const int a_xor_r = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XOR, a_xor_r, a, r, 0); ov = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, ov, n_a_xor_b, a_xor_r, 0); } else if (srcB_sel < 0) { // b = 1 --> OV = a & !result const int nr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, nr, r, 0, 1); ov = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, ov, a, nr, 0); } else { // b = 0 --> OV = !a & result const int na = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, na, a, 0, 1); ov = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, ov, na, r, 0); } const int xer = get_xer(ctx); const int masked_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_xer, xer, 0, ~XER_OV); const int SO_OV = rtl_imm32(unit, XER_SO | XER_OV); const int bits_to_set = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, bits_to_set, SO_OV, ov, ov); const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_xer, masked_xer, bits_to_set, 0); set_xer(ctx, new_xer, 0); } if (insn_Rc(insn)) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_bitmisc: Translate the miscellaneous bit manipulation * instructions (cntlzw, extsh, extsb). * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL instruction to perform the operation. */ static void translate_bitmisc( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop) { RTLUnit * const unit = ctx->unit; const int rS = get_gpr(ctx, insn_rS(insn)); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, result, rS, 0, 0); set_gpr(ctx, insn_rA(insn), result); if (insn_Rc(insn)) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_branch_label: Translate a branch to another block within the * current unit. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * BO: BO field of instruction word (0x14 for an unconditional branch). * BI: BI field of instruction word (ignored if an unconditional branch). * target: Branch target address. * target_label: RTL label for the target block. */ static void translate_branch_label( GuestPPCContext *ctx, uint32_t address, int BO, int BI, uint32_t target, int target_label) { const binrec_t * const handle = ctx->handle; RTLUnit * const unit = ctx->unit; const bool is_conditional = ((BO & 0x14) != 0x14); int skip_label; if ((BO & 0x14) == 0 || (is_conditional && handle->use_branch_exit_test)) { /* Need an extra label in case a non-final test fails. */ skip_label = rtl_alloc_label(unit); } else { skip_label = 0; } RTLOpcode branch_op = RTLOP_GOTO; int test_reg = 0; uint32_t crb_store_branch = 0; uint32_t crb_store_next = 0; uint16_t crb_reg[32]; RTLInsn crb_insn[32]; // Copy of the instruction that sets each value. if (ctx->trim_cr_stores) { guest_ppc_trim_cr_stores(ctx, BO, BI, &crb_store_branch, &crb_store_next, crb_reg, crb_insn); /* If there are bits to store on the branch-taken path for a * conditional branch, we need a label for skipping past the branch * even for a single condition (much like when the branch exit test * is enabled). */ if (crb_store_branch && !skip_label) { skip_label = rtl_alloc_label(unit); } } if (!(BO & 0x04)) { const int ctr = get_ctr(ctx); const int new_ctr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, new_ctr, ctr, 0, -1); set_ctr(ctx, new_ctr); /* Flush here so any update to CTR is stored along with other * pending changes. */ flush_live_regs(ctx, false); if (skip_label) { rtl_add_insn(unit, BO & 0x02 ? RTLOP_GOTO_IF_NZ : RTLOP_GOTO_IF_Z, 0, new_ctr, 0, skip_label); } else { branch_op = BO & 0x02 ? RTLOP_GOTO_IF_Z : RTLOP_GOTO_IF_NZ; test_reg = new_ctr; } } else { flush_live_regs(ctx, false); } /* All dirty registers have been flushed at this point. */ if (!(BO & 0x10)) { const int test = test_crb(ctx, BI); if (handle->use_branch_exit_test || crb_store_branch) { rtl_add_insn(unit, BO & 0x08 ? RTLOP_GOTO_IF_Z : RTLOP_GOTO_IF_NZ, 0, test, 0, skip_label); } else { branch_op = BO & 0x08 ? RTLOP_GOTO_IF_NZ : RTLOP_GOTO_IF_Z; test_reg = test; } } while (crb_store_branch) { const int bit = ctz32(crb_store_branch); crb_store_branch ^= 1 << bit; if (crb_insn[bit].opcode) { rtl_add_insn_copy(unit, &crb_insn[bit]); } rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, crb_reg[bit], 0, ctx->alias.crb[bit]); } if (handle->post_insn_callback) { set_nia_imm(ctx, target); } post_insn_callback(ctx, address); if (handle->use_branch_exit_test) { ASSERT(branch_op == RTLOP_GOTO); const int flag = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD, flag, ctx->psb_reg, 0, ctx->handle->setup.state_offset_branch_exit_flag); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, flag, 0, target_label); set_nia_imm(ctx, target); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, guest_ppc_get_epilogue_label(ctx)); } else { rtl_add_insn(unit, branch_op, 0, test_reg, 0, target_label); } if (skip_label) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, skip_label); } while (crb_store_next) { const int bit = ctz32(crb_store_next); crb_store_next ^= 1 << bit; if (crb_insn[bit].opcode) { rtl_add_insn_copy(unit, &crb_insn[bit]); } rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, crb_reg[bit], 0, ctx->alias.crb[bit]); } } /*-----------------------------------------------------------------------*/ /** * translate_branch_terminal: Translate a branch which terminates * execution of the current unit. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * BO: BO field of instruction word (0x14 for an unconditional branch). * BI: BI field of instruction word (ignored if an unconditional branch). * LK: LK bit of instruction word. * target: Target address, if an immediate branch (ignored for bclr/bcctr). * target_lr: True if the instruction is bclr. * target_ctr: True if the instruction is bcctr. */ static void translate_branch_terminal( GuestPPCContext *ctx, uint32_t address, int BO, int BI, int LK, uint32_t target, bool target_lr, bool target_ctr) { RTLUnit * const unit = ctx->unit; int skip_label; if ((BO & 0x14) == 0x14) { skip_label = 0; // Unconditional branch. } else { skip_label = rtl_alloc_label(unit); } /* For a terminal branch, we can only have dead stores if the branch * is conditional. */ uint32_t crb_store_branch = 0; uint16_t crb_reg[32]; RTLInsn crb_insn[32]; if ((BO & 0x14) != 0x14 && ctx->trim_cr_stores) { uint32_t crb_store_next; guest_ppc_trim_cr_stores(ctx, BO, BI, &crb_store_branch, &crb_store_next, crb_reg, crb_insn); ASSERT(!crb_store_next); } if (!(BO & 0x04)) { const int ctr = get_ctr(ctx); const int new_ctr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, new_ctr, ctr, 0, -1); set_ctr(ctx, new_ctr); flush_live_regs(ctx, false); rtl_add_insn(unit, BO & 0x02 ? RTLOP_GOTO_IF_NZ : RTLOP_GOTO_IF_Z, 0, new_ctr, 0, skip_label); } else { flush_live_regs(ctx, false); } if (!(BO & 0x10)) { const int test = test_crb(ctx, BI); rtl_add_insn(unit, BO & 0x08 ? RTLOP_GOTO_IF_Z : RTLOP_GOTO_IF_NZ, 0, test, 0, skip_label); } int nia; if (target_lr || target_ctr) { const int nia_raw = target_lr ? get_lr(ctx) : get_ctr(ctx); nia = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, nia, nia_raw, 0, -4); } else { nia = rtl_imm32(unit, target); } while (crb_store_branch) { const int bit = ctz32(crb_store_branch); crb_store_branch ^= 1 << bit; if (crb_insn[bit].opcode) { rtl_add_insn_copy(unit, &crb_insn[bit]); } rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, crb_reg[bit], 0, ctx->alias.crb[bit]); } if (LK) { /* Write LR directly rather than going through set_lr() so we don't * leak the modified LR to the not-taken code path. */ rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit, address+4), 0, ctx->alias.lr); } return_from_unit(ctx, address, nia, false); if (skip_label) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, skip_label); } } /*-----------------------------------------------------------------------*/ /** * translate_branch: Translate an immediate-displacement branch instruction. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * BO: BO field of instruction word (0x14 for an unconditional branch). * BI: BI field of instruction word (ignored if an unconditional branch). * disp: Displacement field of instruction word. * AA: AA bit of instruction word. * LK: LK bit of instruction word. */ static void translate_branch( GuestPPCContext *ctx, uint32_t address, int BO, int BI, int32_t disp, int AA, int LK) { ASSERT((address & 3) == 0); ASSERT((disp & 3) == 0); uint32_t target; if (AA) { target = (uint32_t)disp; } else { target = address + disp; } int target_label = 0; if (!LK) { ASSERT(ctx->num_blocks > 0); const uint32_t unit_start = ctx->blocks[0].start; const uint32_t unit_end = (ctx->blocks[ctx->num_blocks-1].start + ctx->blocks[ctx->num_blocks-1].len - 1); if (target >= unit_start && target <= unit_end) { int lo = 0, hi = ctx->num_blocks - 1; /* We break blocks at all branch targets, so this search should * always terminate at a match. */ while (!target_label) { ASSERT(lo <= hi); const int mid = (lo + hi + 1) / 2; const GuestPPCBlockInfo *mid_block = &ctx->blocks[mid]; const uint32_t mid_start = mid_block->start; if (target == mid_start) { target_label = mid_block->label; } else if (target < mid_start) { hi = mid - 1; } else { /* The target should never be in the middle of a block, * since we split blocks at branch targets during * scanning. */ ASSERT(target > mid_start + mid_block->len - 1); lo = mid + 1; } } } } if (target_label) { translate_branch_label(ctx, address, BO, BI, target, target_label); } else { translate_branch_terminal(ctx, address, BO, BI, LK, target, false, false); } } /*-----------------------------------------------------------------------*/ /** * translate_compare: Translate an integer compare instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_imm: True for a register-immediate compare (D-form instruction), * false for a register-register compare (X-form instruction). * is_signed: True for a signed compare, false for an unsigned compare. */ static void translate_compare( GuestPPCContext *ctx, uint32_t insn, bool is_imm, bool is_signed) { RTLUnit * const unit = ctx->unit; const int rA = get_gpr(ctx, insn_rA(insn)); const int lt = rtl_alloc_register(unit, RTLTYPE_INT32); const int gt = rtl_alloc_register(unit, RTLTYPE_INT32); const int eq = rtl_alloc_register(unit, RTLTYPE_INT32); if (is_imm) { const int32_t imm = is_signed ? insn_SIMM(insn) : (int32_t)insn_UIMM(insn); rtl_add_insn(unit, is_signed ? RTLOP_SLTSI : RTLOP_SLTUI, lt, rA, 0, imm); rtl_add_insn(unit, is_signed ? RTLOP_SGTSI : RTLOP_SGTUI, gt, rA, 0, imm); rtl_add_insn(unit, RTLOP_SEQI, eq, rA, 0, imm); } else { const int rB = get_gpr(ctx, insn_rB(insn)); rtl_add_insn(unit, is_signed ? RTLOP_SLTS : RTLOP_SLTU, lt, rA, rB, 0); rtl_add_insn(unit, is_signed ? RTLOP_SGTS : RTLOP_SGTU, gt, rA, rB, 0); rtl_add_insn(unit, RTLOP_SEQ, eq, rA, rB, 0); } const int so = get_xer_so(ctx); set_crf(ctx, insn_crfD(insn), lt, gt, eq, so); } /*-----------------------------------------------------------------------*/ /** * translate_compare_fp: Translate a floating-point compare instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * ordered: True for an ordered comparison (invalid exception on QNaN), * false for an unordered comparison. * ps_index: Paired-single slot index to compare (0 or 1). */ static void translate_compare_fp( GuestPPCContext *ctx, uint32_t insn, bool ordered, int ps_index) { RTLUnit * const unit = ctx->unit; const int obit = ordered ? RTLFCMP_ORDERED : 0; int frA, frB; if (ps_index == 1) { frA = get_ps1(ctx, insn_frA(insn), RTLTYPE_FLOAT32); frB = get_ps1(ctx, insn_frB(insn), RTLTYPE_FLOAT32); } else { frA = get_fpr_as_type(ctx, insn_frA(insn), RTLTYPE_FLOAT64); frB = get_fpr_as_type(ctx, insn_frB(insn), RTLTYPE_FLOAT64); } const int lt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, lt, frA, frB, obit | RTLFCMP_LT); const int gt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, gt, frA, frB, obit | RTLFCMP_GT); const int eq = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, eq, frA, frB, obit | RTLFCMP_EQ); const int un = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, un, frA, frB, obit | RTLFCMP_UN); set_crf(ctx, insn_crfD(insn), lt, gt, eq, un); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fr_fi_fprf = get_fr_fi_fprf(ctx); const int masked = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked, fr_fi_fprf, 0, 0x70); const int shifted_lt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_lt, lt, 0, 3); const int shifted_gt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_gt, gt, 0, 2); const int shifted_eq = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_eq, eq, 0, 1); const int lt_gt = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, lt_gt, shifted_lt, shifted_gt, 0); const int eq_un = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, eq_un, shifted_eq, un, 0); const int fpcc = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, fpcc, lt_gt, eq_un, 0); const int merged = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, merged, masked, fpcc, 0); set_fr_fi_fprf(ctx, merged); const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int invalid = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, invalid, fpstate, 0, RTLFEXC_INVALID); const int label_out = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, invalid, 0, label_out); const int clearexc = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FCLEAREXC, clearexc, fpstate, 0, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, clearexc, 0, 0); const int fpscr = get_fpscr(ctx); /* FPSCR is changed conditionally, so we can't save it. */ ctx->live.fpscr = 0; ctx->last_set.fpscr = -1; if (ordered && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_IGNORE_FPSCR_VXFOO)) { /* If neither value is a SNaN, this is a VXVC exception from * ordered comparison of a QNaN. */ const int label_snan = rtl_alloc_label(unit); check_snan(ctx, frA, label_snan); check_snan(ctx, frB, label_snan); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXVC); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_snan); /* If this is a VXSNAN exception, we only set VXVC if VE is * clear. */ const int label_no_vxvc = rtl_alloc_label(unit); const int ve_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ve_test, fpscr, 0, FPSCR_VE); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, ve_test, 0, label_no_vxvc); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXSNAN | FPSCR_VXVC); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_vxvc); } set_fpscr_exceptions(ctx, fpscr, FPSCR_VXSNAN); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); } } /*-----------------------------------------------------------------------*/ /** * translate_dcbz: Translate a dcbz or dcbz_l instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. */ static void translate_dcbz(GuestPPCContext *ctx, uint32_t insn) { RTLUnit * const unit = ctx->unit; int addr32; if (insn_rA(insn)) { const int rA = get_gpr(ctx, insn_rA(insn)); const int rB = get_gpr(ctx, insn_rB(insn)); addr32 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADD, addr32, rA, rB, 0); } else { addr32 = get_gpr(ctx, insn_rB(insn)); } const int addr32_aligned = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, addr32_aligned, addr32, 0, -32); const int addr_aligned = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, addr_aligned, addr32_aligned, 0, 0); const int host_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, host_address, ctx->membase_reg, addr_aligned, 0); const int zero_64 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_LOAD_IMM, zero_64, 0, 0, 0); const int zero_128 = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBROADCAST, zero_128, zero_64, 0, 0); rtl_add_insn(unit, RTLOP_STORE, 0, host_address, zero_128, 0); rtl_add_insn(unit, RTLOP_STORE, 0, host_address, zero_128, 16); } /*-----------------------------------------------------------------------*/ /** * translate_fctiw: Translate an fctiw or fctiwz instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * convert_op: RTL opcode for the conversion (FROUNDI or FTRUNCI). */ static void translate_fctiw(GuestPPCContext *ctx, uint32_t insn, RTLOpcode convert_op) { RTLUnit * const unit = ctx->unit; /* There's no need to convert from single to double precision if the * register is currently in single precision; the result would be the * same either way. */ const RTLDataType source_type = get_fpr_scalar_type(ctx, insn_frB(insn)); const int frB = get_fpr_as_type(ctx, insn_frB(insn), source_type); /* Do the actual conversion, and handle positive overflow if necessary. */ const int conv_result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, convert_op, conv_result, frB, 0, 0); const int imm_2_31 = rtl_alloc_register(unit, source_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, imm_2_31, 0, 0, (source_type == RTLTYPE_FLOAT64 ? UINT64_C(0x41DFFFFFFFC00000) // 2147483647.0 : UINT64_C(0x4F000000))); // 2147483648.0f const int overflow = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, overflow, frB, imm_2_31, RTLFCMP_GE); const int imm_intmax = rtl_imm32(unit, 0x7FFFFFFF); const int result32 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, result32, imm_intmax, conv_result, overflow); /* Set the high word of the result appropriately, unless disabled by * optimization flags. */ int result64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ZCAST, result64, result32, 0, 0); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FCTIW)) { RTLDataType frB_bits_type; uint64_t negative_zero_val; if (unit->regs[frB].type == RTLTYPE_FLOAT32) { frB_bits_type = RTLTYPE_INT32; negative_zero_val = UINT64_C(1) << 31; } else { frB_bits_type = RTLTYPE_INT64; negative_zero_val = UINT64_C(1) << 63; } const int frB_bits = rtl_alloc_register(unit, frB_bits_type); rtl_add_insn(unit, RTLOP_BITCAST, frB_bits, frB, 0, 0); const int negative_zero = rtl_alloc_register(unit, frB_bits_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, negative_zero, 0, 0, negative_zero_val); const int is_neg_zero = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SEQ, is_neg_zero, frB_bits, negative_zero, 0); const int inz_shifted = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLLI, inz_shifted, is_neg_zero, 0, 32); const int high_word_base = rtl_imm64(unit, 0xFFF8000000000000); const int high_word = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_OR, high_word, high_word_base, inz_shifted, 0); const int new_bits = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_OR, new_bits, result64, high_word, 0); result64 = new_bits; } const int result = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, result, result64, 0, 0); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) { set_fpr(ctx, insn_frD(insn), result); ctx->fpr_is_safe |= 1 << insn_frD(insn); if (insn_Rc(insn)) { update_cr1(ctx); } return; } /* Check for exceptions, like set_fp_result(). We can't call that * function directly because we don't return NaNs on exceptions. * We can also omit the overflow and underflow checks since fctiw[z] * doesn't raise those exceptions. */ const int fpscr = get_fpscr(ctx); ctx->live.fpscr = 0; ctx->last_set.fpscr = -1; ctx->live.fr_fi_fprf = 0; ctx->last_set.fr_fi_fprf = -1; flush_fpr(ctx, insn_frD(insn), true); const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int clearexc = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FCLEAREXC, clearexc, fpstate, 0, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, clearexc, 0, 0); int label_out = rtl_alloc_label(unit); const int invalid = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, invalid, fpstate, 0, RTLFEXC_INVALID); const int label_no_vx = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, invalid, 0, label_no_vx); int label_check_ve_snan = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_IGNORE_FPSCR_VXFOO)) { const int label_snan = rtl_alloc_label(unit); check_snan(ctx, frB, label_snan); set_fpscr_exceptions(ctx, fpscr, FPSCR_VXCVI); label_check_ve_snan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_check_ve_snan); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_snan); } set_fpscr_exceptions(ctx, fpscr, FPSCR_VXSNAN | FPSCR_VXCVI); if (label_check_ve_snan) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_check_ve_snan); } const int ve_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ve_test, fpscr, 0, FPSCR_VE); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, ve_test, 0, label_no_vx); const int fr_fi_fprf = get_fr_fi_fprf(ctx); const int fr_fi_cleared = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fr_fi_cleared, fr_fi_fprf, 0, 0x1F); set_fr_fi_fprf_and_flush(ctx, fr_fi_cleared); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_vx); set_fpr_and_flush(ctx, insn_frD(insn), result, true); const int fprf = gen_fprf(unit, result, 0); const int inexact = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, inexact, fpstate, 0, RTLFEXC_INEXACT); const int shifted_fi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_fi, inexact, 0, 5); const int fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, fi_fprf, fprf, shifted_fi, 0); set_fr_fi_fprf_and_flush(ctx, fi_fprf); const int label_no_xx = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, inexact, 0, label_no_xx); set_fpscr_exceptions(ctx, 0, FPSCR_XX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_xx); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * check_fp_underflow: Check for, and raise if appropriate, an underflow * exception on a floating-point result whose magnitude is the minimum * normal value for its type. * * The PowerPC architecture defines underflow as "tiny before rounding", * while x86 (at least) defines it as "tiny after rounding". If we're not * ignoring precise underflow behavior and the result is equal to the * smallest (in magnitude) normal number, we may need to manually raise an * underflow exception in order to match PowerPC behavior. This function * checks for this case and, if it applies, runs the operation a second * time in round-toward-zero mode, which will raise an underflow exception * if appropriate regardless of whether the host detects underflow before * or after rounding. * * This function does nothing if underflow handling is disabled by * optimizations. * * [Parameters] * ctx: Translation context. * result: Result of the operation. * rtlop: RTL opcode to perform the operation. * src1, src2, src3: RTL instruction operands. * is_single: True if a single-precision (32-bit) operation, false if * a double-precision (64-bit) operation. * is_paired: True if a paired-single operation, false if not. * use_float32: True if the input operands (src1/src2/src3) are of * type FLOAT32 (V2_FLOAT32 for a paired-single operation), false * if they are of type FLOAT64 (V2_FLOAT64 for paired-single). */ static void check_fp_underflow( GuestPPCContext *ctx, int result, RTLOpcode rtlop, int src1, int src2, int src3, bool is_single, bool is_paired, bool use_float32) { if ((ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_UNDERFLOW) || (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { return; } RTLUnit * const unit = ctx->unit; const int bits_type = is_single ? RTLTYPE_INT32 : RTLTYPE_INT64; const int min_normal_x2 = rtl_alloc_register(unit, bits_type); if (is_single) { rtl_add_insn(unit, RTLOP_LOAD_IMM, min_normal_x2, 0, 0, 0x00800000<<1); } else { rtl_add_insn(unit, RTLOP_LOAD_IMM, min_normal_x2, 0, 0, UINT64_C(0x0010000000000000)<<1); } int label_check_tiny = 0; if (is_paired) { ASSERT(is_single); const int result_ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, result_ps1, result, 0, 1); const int result_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, result_bits, result_ps1, 0, 0); const int result_bits_x2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, result_bits_x2, result_bits, 0, 1); const int is_min_normal = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, is_min_normal, result_bits_x2, min_normal_x2, 0); label_check_tiny = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, is_min_normal, 0, label_check_tiny); const int result_ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, result_ps0, result, 0, 0); result = result_ps0; } const int result_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, result_bits, result, 0, 0); /* Ignore the sign bit by shifting it out. */ const int result_bits_x2 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, result_bits_x2, result_bits, 0, 1); const int is_min_normal = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, is_min_normal, result_bits_x2, min_normal_x2, 0); const int label_skip_tiny = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, is_min_normal, 0, label_skip_tiny); /* Note that we don't actually need to save this result; we just need * to execute the operations so that the underflow exception is raised * if appropriate. The floating-point side effect check will prevent * these technically dead stores from being eliminated (unless * BINREC_OPT_DSE_FP is enabled, in which case this entire check is * meaningless anyway). */ if (label_check_tiny) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_check_tiny); } const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int fpstate_trunc = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, fpstate_trunc, fpstate, 0, RTLFROUND_TRUNC); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate_trunc, 0, 0); const RTLDataType type = is_paired ? (use_float32 ? RTLTYPE_V2_FLOAT32 : RTLTYPE_V2_FLOAT64) : (use_float32 ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64); const int dummy_result = rtl_alloc_register(unit, type); rtl_add_insn(unit, rtlop, dummy_result, src1, src2, src3); if (is_single && !use_float32) { const RTLDataType type32 = is_paired ? RTLTYPE_V2_FLOAT32 : RTLTYPE_FLOAT32; const int dummy_result32 = rtl_alloc_register(unit, type32); rtl_add_insn(unit, is_paired ? RTLOP_VFCVT : RTLOP_FCVT, dummy_result32, dummy_result, 0, 0); } const int fpstate2 = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate2, 0, 0, 0); const int fpstate2_oldround = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FCOPYROUND, fpstate2_oldround, fpstate2, fpstate, 0); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate2_oldround, 0, 0); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_skip_tiny); } /*-----------------------------------------------------------------------*/ /** * translate_fp_arith: Translate a two-operand floating-point arithmetic * instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL opcode to perform the operation. * is_single: True if a single-precision (32-bit) operation, false if * a double-precision (64-bit) operation. * vxfoo_no_snan: FPSCR_VXFOO bitmask indicating which non-VXSNAN * exception(s) can be raised by the instruction. */ static void translate_fp_arith( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool is_single, uint32_t vxfoo_no_snan) { RTLUnit * const unit = ctx->unit; const int src1_fpr = insn_frA(insn); const int src2_fpr = (rtlop==RTLOP_FMUL ? insn_frC(insn) : insn_frB(insn)); bool use_float32 = false; if (is_single) { const bool src1_32 = (get_fpr_scalar_type(ctx, src1_fpr) == RTLTYPE_FLOAT32); const bool src2_32 = (get_fpr_scalar_type(ctx, src2_fpr) == RTLTYPE_FLOAT32); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS) { use_float32 = src1_32 || src2_32; } else { use_float32 = src1_32 && src2_32; } } const RTLDataType type = use_float32 ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64; int src1 = get_fpr_as_type(ctx, src1_fpr, type); int src2 = get_fpr_as_type(ctx, src2_fpr, type); if (is_single && rtlop == RTLOP_FMUL && type == RTLTYPE_FLOAT64 && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMULS)) { round_for_multiply(ctx, &src1, &src2); } int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, rtlop, result, src1, src2, 0); if (is_single && !use_float32) { const int result64 = result; result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result, result64, 0, 0); } check_fp_underflow(ctx, result, rtlop, src1, src2, 0, is_single, false, use_float32); set_fp_result(ctx, insn_frD(insn), result, 0, src1, src2, 0, 0, vxfoo_no_snan, true, rtlop == RTLOP_FDIV, true, true); if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_fp_fma: Translate a floating-point multiply-add instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL opcode to perform the operation. * is_single: True if a single-precision (32-bit) operation, false if a * double-precision (64-bit) operation. * negate: True to negate a non-NaN result. */ static void translate_fp_fma( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool is_single, bool negate) { RTLUnit * const unit = ctx->unit; bool use_float32 = false; if (is_single) { const bool frA_32 = (get_fpr_scalar_type(ctx, insn_frA(insn)) == RTLTYPE_FLOAT32); const bool frB_32 = (get_fpr_scalar_type(ctx, insn_frB(insn)) == RTLTYPE_FLOAT32); const bool frC_32 = (get_fpr_scalar_type(ctx, insn_frC(insn)) == RTLTYPE_FLOAT32); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS) { /* It's still beneficial (or at least not harmful) to use * single precision if only one input is in single precision, * because we end up with two conversions either way. */ use_float32 = frA_32 || frB_32 || frC_32; } else { use_float32 = frA_32 && frB_32 && frC_32; } } const RTLDataType type = use_float32 ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64; int frA = get_fpr_as_type(ctx, insn_frA(insn), type); int frC = get_fpr_as_type(ctx, insn_frC(insn), type); int frB = get_fpr_as_type(ctx, insn_frB(insn), type); if (is_single && type == RTLTYPE_FLOAT64 && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMULS)) { round_for_multiply(ctx, &frA, &frC); } int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, rtlop, result, frA, frC, frB); if (negate) { result = fma_negate(ctx, result); } /* If the result is a NaN copied from an input operand, we need to * ensure that the proper input operand is selected (unless the * NATIVE_IEEE_NAN optimization is enabled, in which case we don't * care). The RTL FMADD-group instructions are defined to choose in * the order {src1, src2, src3}, which for us is {frA, frC, frB}, but * PowerPC gives frB precedence over frC, so we have to manually load * the frB NaN in that case. */ if (!(ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN)) { result = fma_select_nan(ctx, result, frA, frB, frC); } if (is_single && !use_float32) { if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMADDS) { const int result64 = result; result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result, result64, 0, 0); } else { result = round_fma_result_to_single( ctx, result, rtlop, frA, frB, frC); } } check_fp_underflow(ctx, result, rtlop, frA, frC, frB, is_single, false, use_float32); set_fp_result(ctx, insn_frD(insn), result, 0, frA, frB, frC, 0, FPSCR_VXIMZ | FPSCR_VXISI, true, false, true, true); if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_fres_lookup: Translate a reciprocal estimate operation using * lookup tables. * * [Parameters] * ctx: Translation context. * input: RTL register containing the source value (type FLOAT32). * output: RTL alias to which to store the result. * label_abort: RTL label to which to jump if the operation is aborted * due to an unmasked invalid-operation exception. May be zero if * the BINREC_OPT_G_PPC_NO_FPSCR_STATE optimization is enabled. */ static void translate_fres_lookup(GuestPPCContext *ctx, int input, int output, int label_abort) { RTLUnit * const unit = ctx->unit; const int alias_exp = rtl_alloc_alias_register(unit, RTLTYPE_INT32); const int alias_mant = rtl_alloc_alias_register(unit, RTLTYPE_INT32); const int label_out = rtl_alloc_label(unit); int label_pre_abort = 0, alias_fi = 0, fpscr_in = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { label_pre_abort = rtl_alloc_label(unit); alias_fi = rtl_alloc_alias_register(unit, RTLTYPE_INT32); fpscr_in = get_fpscr(ctx); ctx->live.fpscr = 0; ctx->last_set.fpscr = -1; ctx->live.fr_fi_fprf = 0; ctx->last_set.fr_fi_fprf = -1; } /* Extract the sign bit, exponent, and mantissa, and check for special * cases. */ const int input_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BITCAST, input_bits, input, 0, 0); const int sign = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, sign, input_bits, 0, 1<<31); const int mantissa = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, mantissa, input_bits, 0, 0 | 23<<8); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, mantissa, 0, alias_mant); const int exponent = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, exponent, input_bits, 0, 23 | 8<<8); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, exponent, 0, alias_exp); const int label_exp_0 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, exponent, 0, label_exp_0); const int exp_is_255 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, exp_is_255, exponent, 0, 255); const int label_exp_255 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, exp_is_255, 0, label_exp_255); const int label_continue = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_continue); /* Handle an infinite or NaN input. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_exp_255); const int label_nan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, mantissa, 0, label_nan); const int zero_result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, zero_result, sign, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, zero_result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int pzero = rtl_imm32(unit, 0x02); const int nzero = rtl_imm32(unit, 0x12); const int fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, fprf, nzero, pzero, sign); set_fr_fi_fprf_and_flush(ctx, fprf); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_nan); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int quiet_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, quiet_test, mantissa, 0, 0x400000); const int label_snan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, quiet_test, 0, label_snan); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, input, 0, output); set_fr_fi_fprf_and_flush(ctx, rtl_imm32(unit,0x11)); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_snan); set_fpscr_exceptions(ctx, 0, FPSCR_VXSNAN); const int ve_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ve_test, fpscr_in, 0, FPSCR_VE); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, ve_test, 0, label_pre_abort); } const int quiet_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, quiet_bits, input_bits, 0, 0x400000); const int quiet_result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, quiet_result, quiet_bits, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, quiet_result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { set_fr_fi_fprf_and_flush(ctx, rtl_imm32(unit,0x11)); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); /* Handle a zero or denormalized input. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_exp_0); const int label_not_zero = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, mantissa, 0, label_not_zero); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { set_fpscr_exceptions(ctx, 0, FPSCR_ZX); const int ze_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ze_test, fpscr_in, 0, FPSCR_ZE); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, ze_test, 0, label_pre_abort); } const int inf_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, inf_bits, sign, 0, 0x7F800000); const int inf_result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, inf_result, inf_bits, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, inf_result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int pinf = rtl_imm32(unit, 0x05); const int ninf = rtl_imm32(unit, 0x09); const int fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, fprf, ninf, pinf, sign); set_fr_fi_fprf_and_flush(ctx, fprf); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_not_zero); const int overflow_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTUI, overflow_test, mantissa, 0, 0x200000); const int label_not_overflow = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, overflow_test, 0, label_not_overflow); const int huge_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, huge_bits, sign, 0, 0x7F7FFFFF); const int huge_result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, huge_result, huge_bits, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, huge_result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { set_fpscr_exceptions(ctx, 0, FPSCR_OX); const int pnorm_fi = rtl_imm32(unit, 0x24); const int nnorm_fi = rtl_imm32(unit, 0x28); const int fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, fprf, nnorm_fi, pnorm_fi, sign); set_fr_fi_fprf_and_flush(ctx, fprf); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_not_overflow); const int shifted_mant_1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_mant_1, mantissa, 0, 1); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, shifted_mant_1, 0, alias_mant); const int normalized_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, normalized_test, shifted_mant_1, 0, 0x800000); const int label_normalized = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, normalized_test, 0, label_normalized); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit,-1), 0, alias_exp); const int shifted_mant_2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_mant_2, shifted_mant_1, 0, 1); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, shifted_mant_2, 0, alias_mant); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_normalized); const int normalized_mant = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, normalized_mant, 0, 0, alias_mant); const int cleared_800000 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, cleared_800000, normalized_mant, 0, 0x7FFFFF); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, cleared_800000, 0, alias_mant); /* Calculate the new exponent and mantissa. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_continue); const int lut = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD, lut, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.fres_lut); const int mantissa_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, mantissa_in, 0, 0, alias_mant); const int index = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, index, mantissa_in, 0, 18); const int index4 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, index4, index, 0, 2); const int cst_253 = rtl_imm32(unit, 253); const int exponent_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, exponent_in, 0, 0, alias_exp); const int new_exp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, new_exp, cst_253, exponent_in, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_exp, 0, alias_exp); const int index4_addr = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, index4_addr, index4, 0, 0); const int entry_addr = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, entry_addr, lut, index4_addr, 0); const int delta = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_U16, delta, entry_addr, 0, 2); const int base = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_U16, base, entry_addr, 0, 0); const int mult = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, mult, mantissa_in, 0, 8 | 10<<8); const int mult_delta = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_MUL, mult_delta, mult, delta, 0); const int shifted_base = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_base, base, 0, 10); const int lookup_result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, lookup_result, shifted_base, mult_delta, 0); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fi, lookup_result, 0, 1); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, fi, 0, alias_fi); } const int lookup_mant = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, lookup_mant, lookup_result, 0, 1); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, lookup_mant, 0, alias_mant); /* Denormalize the result if the exponent is not positive. */ const int label_denormalize_1 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, new_exp, 0, label_denormalize_1); const int exp_neg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTSI, exp_neg, new_exp, 0, 0); const int label_denormalize_2 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, exp_neg, 0, label_denormalize_2); /* Build and store the final value, and update FPSCR if appropriate. */ const int label_finalize = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_finalize); const int final_mantissa = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, final_mantissa, 0, 0, alias_mant); const int sign_mant = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, sign_mant, final_mantissa, sign, 0); const int final_exponent = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, final_exponent, 0, 0, alias_exp); const int shifted_final_exp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_final_exp, final_exponent, 0, 23); const int final_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, final_bits, sign_mant, shifted_final_exp, 0); const int result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, result, final_bits, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fprf = gen_fprf(unit, result, 0); const int fi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fi, 0, 0, alias_fi); const int shifted_fi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_fi, fi, 0, 5); const int label_no_ux = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, shifted_fi, 0, label_no_ux); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, shifted_final_exp, 0, label_no_ux); set_fpscr_exceptions(ctx, 0, FPSCR_UX); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_ux); const int fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, fi_fprf, shifted_fi, fprf, 0); set_fr_fi_fprf_and_flush(ctx, fi_fprf); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); /* Handle denormalizing a tiny result. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_denormalize_2); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit,0), 0, alias_exp); const int denorm2_mant_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, denorm2_mant_in, 0, 0, alias_mant); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fi_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fi_in, 0, 0, alias_fi); const int mant_low_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, mant_low_bits, denorm2_mant_in, 0, 3); const int fi_add = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTUI, fi_add, mant_low_bits, 0, 0); const int new_fi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_fi, fi_in, fi_add, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_fi, 0, alias_fi); } const int denorm2_temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, denorm2_temp, denorm2_mant_in, 0, 0x800000); const int denorm2_mant_out = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, denorm2_mant_out, denorm2_temp, 0, 2); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, denorm2_mant_out, 0, alias_mant); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_finalize); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_denormalize_1); const int denorm1_mant_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, denorm1_mant_in, 0, 0, alias_mant); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fi_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fi_in, 0, 0, alias_fi); const int fi_add = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fi_add, denorm1_mant_in, 0, 1); const int new_fi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_fi, fi_in, fi_add, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_fi, 0, alias_fi); } const int denorm1_temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, denorm1_temp, denorm1_mant_in, 0, 0x800000); const int denorm1_mant_out = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, denorm1_mant_out, denorm1_temp, 0, 1); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, denorm1_mant_out, 0, alias_mant); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_finalize); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_pre_abort); const int fr_fi_fprf = get_fr_fi_fprf(ctx); const int fr_fi_cleared = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fr_fi_cleared, fr_fi_fprf, 0, 0x1F); set_fr_fi_fprf_and_flush(ctx, fr_fi_fprf); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_abort); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); } /*-----------------------------------------------------------------------*/ /** * translate_frsqrte_lookup: Translate a reciprocal square root estimate * operation using lookup tables. * * It's worth noting that although the PowerPC 750CL manual states, in * relation to the lack of a single-precision version of frsqrte, that * "both frB and frD are representable in single-precision format" (which * itself is a nonsensical statement, since frB is an input rather than an * output operand), the actual implementation can generate values which * are _not_ representable in single precision, even on single-precision * inputs. For example, the single-precision input 0x4080_0100 gives the * double-precision result 0x3FDF_FE61_7000_0000, which has 24 bits of * precision in the mantissa. * * [Parameters] * ctx: Translation context. * input: RTL register containing the source value (type FLOAT32 or * FLOAT64). * output: RTL alias to which to store the result. * label_abort: RTL label to which to jump if the operation is aborted * due to an unmasked invalid-operation exception. May be zero if * the BINREC_OPT_G_PPC_NO_FPSCR_STATE optimization is enabled. */ static void translate_frsqrte_lookup(GuestPPCContext *ctx, int input, int output, int label_abort) { RTLUnit * const unit = ctx->unit; const bool is32 = (unit->regs[input].type == RTLTYPE_FLOAT32); const RTLDataType type = is32 ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64; const RTLDataType bits_type = is32 ? RTLTYPE_INT32 : RTLTYPE_INT64; const int alias_exp = rtl_alloc_alias_register(unit, RTLTYPE_INT32); const int alias_mant_hi = rtl_alloc_alias_register(unit, RTLTYPE_INT32); const int label_out = rtl_alloc_label(unit); int label_pre_abort = 0, fpscr_in = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { label_pre_abort = rtl_alloc_label(unit); fpscr_in = get_fpscr(ctx); ctx->live.fpscr = 0; ctx->last_set.fpscr = -1; ctx->live.fr_fi_fprf = 0; ctx->last_set.fr_fi_fprf = -1; } /* Extract the sign bit, exponent, and mantissa, and check for special * cases. */ const int input_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BITCAST, input_bits, input, 0, 0); int input_hibits; if (is32) { input_hibits = input_bits; } else { const int input_hibits_64 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SRLI, input_hibits_64, input_bits, 0, 32); input_hibits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, input_hibits, input_hibits_64, 0, 0); } const int sign_mask = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, sign_mask, 0, 0, is32 ? 1u<<31 : UINT64_C(1)<<63); const int sign = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_AND, sign, input_bits, sign_mask, 0); const int mantissa_hi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, mantissa_hi, input_hibits, 0, 0 | (is32 ? 23 : 20) << 8); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, mantissa_hi, 0, alias_mant_hi); const int exponent = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, exponent, input_hibits, 0, is32 ? 23 | 8<<8 : 20 | 11<<8); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, exponent, 0, alias_exp); const int label_exp_0 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, exponent, 0, label_exp_0); const int exp_is_max = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, exp_is_max, exponent, 0, is32 ? 255 : 2047); const int label_exp_max = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, exp_is_max, 0, label_exp_max); const int label_vxsqrt = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, sign, 0, label_vxsqrt); const int label_continue = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_continue); /* Handle an infinite or NaN input. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_exp_max); const int expmax_mantissa = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BFEXT, expmax_mantissa, input_bits, 0, 0 | (is32 ? 23 : 52) << 8); const int label_nan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, expmax_mantissa, 0, label_nan); /* Negative infinity turns into a NaN. */ rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, sign, 0, label_vxsqrt); const int zero_result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_LOAD_IMM, zero_result, 0, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, zero_result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { set_fr_fi_fprf_and_flush(ctx, rtl_imm32(unit,0x02)); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_nan); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int quiet_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, quiet_test, mantissa_hi, 0, is32 ? 0x400000 : 0x80000); const int label_snan = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, quiet_test, 0, label_snan); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, input, 0, output); set_fr_fi_fprf_and_flush(ctx, rtl_imm32(unit,0x11)); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_snan); set_fpscr_exceptions(ctx, 0, FPSCR_VXSNAN); const int ve_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ve_test, fpscr_in, 0, FPSCR_VE); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, ve_test, 0, label_pre_abort); } const int quiet_mask = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, quiet_mask, 0, 0, is32 ? 1<<22 : UINT64_C(1)<<51); const int quiet_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, quiet_bits, input_bits, quiet_mask, 0); const int quiet_result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_BITCAST, quiet_result, quiet_bits, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, quiet_result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { set_fr_fi_fprf_and_flush(ctx, rtl_imm32(unit,0x11)); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); /* Handle a zero or denormalized input. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_exp_0); const int exp0_mantissa = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BFEXT, exp0_mantissa, input_bits, 0, 0 | (is32 ? 23 : 52) << 8); const int label_not_zero = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, exp0_mantissa, 0, label_not_zero); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { set_fpscr_exceptions(ctx, 0, FPSCR_ZX); const int ze_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ze_test, fpscr_in, 0, FPSCR_ZE); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, ze_test, 0, label_pre_abort); } const int pinf_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, pinf_bits, 0, 0, is32 ? 0x7F800000 : UINT64_C(0x7FF0000000000000)); const int inf_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, inf_bits, sign, pinf_bits, 0); const int inf_result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_BITCAST, inf_result, inf_bits, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, inf_result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int pinf = rtl_imm32(unit, 0x05); const int ninf = rtl_imm32(unit, 0x09); const int fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, fprf, ninf, pinf, sign); set_fr_fi_fprf_and_flush(ctx, fprf); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_not_zero); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, sign, 0, label_vxsqrt); const int norm_mant_clz = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_CLZ, norm_mant_clz, exp0_mantissa, 0, 0); const int norm_mant_shift = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, norm_mant_shift, norm_mant_clz, 0, is32 ? -8 : -11); const int norm_neg_exp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, norm_neg_exp, norm_mant_clz, 0, is32 ? -9 : -12); const int norm_shifted_mant64 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLL, norm_shifted_mant64, exp0_mantissa, norm_mant_shift, 0); const int norm_exp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NEG, norm_exp, norm_neg_exp, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, norm_exp, 0, alias_exp); const int norm_mant64_hi = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_BFEXT, norm_mant64_hi, norm_shifted_mant64, 0, is32 ? 0 | 23<<8 : 32 | 20<<8); const int norm_mant_hi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, norm_mant_hi, norm_mant64_hi, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, norm_mant_hi, 0, alias_mant_hi); /* Calculate the new exponent and mantissa. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_continue); const int lut = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD, lut, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.frsqrte_lut); const int mantissa_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, mantissa_in, 0, 0, alias_mant_hi); const int index_mant = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, index_mant, mantissa_in, 0, is32 ? 19 : 16); const int index4_mant = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, index4_mant, index_mant, 0, 2); const int exponent_in = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, exponent_in, 0, 0, alias_exp); const int exp_lowbit = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, exp_lowbit, exponent_in, 0, 1); const int exp_inv_lowbit = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, exp_inv_lowbit, exp_lowbit, 0, 1); const int exp_lowbit_sll6 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, exp_lowbit_sll6, exp_inv_lowbit, 0, 6); const int index4 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, index4, index4_mant, exp_lowbit_sll6, 0); const int cst_3068 = rtl_imm32(unit, is32 ? 380 : 3068); const int subbed_exp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, subbed_exp, cst_3068, exponent_in, 0); const int final_exponent = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, final_exponent, subbed_exp, 0, 1); const int index4_addr = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, index4_addr, index4, 0, 0); const int entry_addr = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, entry_addr, lut, index4_addr, 0); const int delta = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_U16, delta, entry_addr, 0, 2); const int base = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_U16, base, entry_addr, 0, 0); const int mult = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, mult, mantissa_in, 0, (is32 ? 8 : 5) | 11<<8); const int mult_delta = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_MUL, mult_delta, mult, delta, 0); const int shifted_base = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_base, base, 0, 11); const int lookup_result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, lookup_result, shifted_base, mult_delta, 0); /* Build and store the final value, and update FPSCR if appropriate. */ int final_bits; if (is32) { const int final_mant = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SRLI, final_mant, lookup_result, 0, 3); const int shifted_exp = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, shifted_exp, final_exponent, 0, 23); const int sign_mant = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, sign_mant, final_mant, sign, 0); final_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, final_bits, sign_mant, shifted_exp, 0); } else { const int lookup_mant64 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_ZCAST, lookup_mant64, lookup_result, 0, 0); const int final_exp64 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_ZCAST, final_exp64, final_exponent, 0, 0); const int final_mant64 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, final_mant64, lookup_mant64, 0, 26); const int shifted_exp64 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_SLLI, shifted_exp64, final_exp64, 0, 52); const int sign_mant64 = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, sign_mant64, final_mant64, sign, 0); final_bits = rtl_alloc_register(unit, bits_type); rtl_add_insn(unit, RTLOP_OR, final_bits, sign_mant64, shifted_exp64, 0); } const int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_BITCAST, result, final_bits, 0, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, result, 0, output); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fprf = gen_fprf(unit, result, 0); set_fr_fi_fprf_and_flush(ctx, fprf); } rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); /* Handle an invalid (negative) input. */ rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_vxsqrt); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { set_fpscr_exceptions(ctx, 0, FPSCR_VXSQRT); const int ve_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, ve_test, fpscr_in, 0, FPSCR_VE); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, ve_test, 0, label_pre_abort); set_fr_fi_fprf_and_flush(ctx, rtl_imm32(unit,0x11)); } const int qnan_result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_LOAD_IMM, qnan_result, 0, 0, is32 ? 0x7FC00000 : UINT64_C(0x7FF8000000000000)); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, qnan_result, 0, output); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_pre_abort); const int fr_fi_fprf = get_fr_fi_fprf(ctx); const int fr_fi_cleared = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fr_fi_cleared, fr_fi_fprf, 0, 0x1F); set_fr_fi_fprf_and_flush(ctx, fr_fi_cleared); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_abort); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); } /*-----------------------------------------------------------------------*/ /** * translate_fp_recip: Translate an fres or frsqrte instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_rsqrte: True if the instruction is frsqrte, false if fres. */ static void translate_fp_recip(GuestPPCContext *ctx, uint32_t insn, bool is_rsqrte) { RTLUnit * const unit = ctx->unit; const RTLDataType type = is_rsqrte ? RTLTYPE_FLOAT64 : RTLTYPE_FLOAT32; const int frB = get_fpr_as_type(ctx, insn_frB(insn), type); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NATIVE_RECIPROCAL) { int div_src; if (is_rsqrte) { div_src = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_FSQRT, div_src, frB, 0, 0); } else { div_src = frB; } const int one = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_LOAD_IMM, one, 0, 0, type==RTLTYPE_FLOAT64 ? UINT64_C(0x3FF0000000000000) : 0x3F800000); const int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_FDIV, result, one, div_src, 0); set_fp_result(ctx, insn_frD(insn), result, 0, 0, frB, 0, 0, is_rsqrte ? FPSCR_VXSQRT : 0, true, true, false, true); } else { const int alias = rtl_alloc_alias_register(unit, type); int label_skip_set = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { flush_fpr(ctx, insn_frD(insn), true); label_skip_set = rtl_alloc_label(unit); } if (is_rsqrte) { translate_frsqrte_lookup(ctx, frB, alias, label_skip_set); } else { translate_fres_lookup(ctx, frB, alias, label_skip_set); } const int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_GET_ALIAS, result, 0, 0, alias); if (label_skip_set) { set_fpr_and_flush(ctx, insn_frD(insn), result, true); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_skip_set); } else { set_fpr(ctx, insn_frD(insn), result); } } if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_load_store_fpr: Translate a floating-point load or store * instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_single: True if a single-precision (32-bit) operation, false if a * double-precision (64-bit) operation. * is_store: True if the instruction is a store instruction. * is_indexed: True if the access is an indexed access (like lwx or stwx). * update: True if register rA should be updated with the final EA. */ static void translate_load_store_fpr( GuestPPCContext *ctx, uint32_t insn, bool is_single, bool is_store, bool is_indexed, bool update) { RTLUnit * const unit = ctx->unit; const RTLDataType type = is_single ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64; const RTLDataType raw_type = is_single ? RTLTYPE_INT32 : RTLTYPE_INT64; const RTLOpcode rtlop = (ctx->handle->host_little_endian ? (is_store ? RTLOP_STORE_BR : RTLOP_LOAD_BR) : (is_store ? RTLOP_STORE : RTLOP_LOAD)); int disp, ea; const int host_address = gen_load_store_address(ctx, insn, is_indexed, update, &disp, &ea); const int frD = insn_frD(insn); if (is_store) { if (is_single && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_STFS) && get_fpr_scalar_type(ctx, frD) != RTLTYPE_FLOAT32) { /* stfs performs a bitwise conversion from double precision * rather than an arithmetic conversion. */ const int value = get_fpr_as_type(ctx, frD, RTLTYPE_FLOAT64); store_float64_as_32(unit, rtlop, host_address, value, disp, false); } else { const int value_raw = ctx->fpr_raw[frD]; if (value_raw && unit->regs[value_raw].type == raw_type) { rtl_add_insn(unit, RTLOP_STORE, 0, host_address, value_raw, disp); } else { const int value = get_fpr_as_type(ctx, frD, type); rtl_add_insn(unit, rtlop, 0, host_address, value, disp); } } } else { if (ctx->forward_loads) { const int value_raw = rtl_alloc_register(unit, raw_type); rtl_add_insn(unit, RTLOP_LOAD, value_raw, host_address, 0, disp); int value_swapped; if (ctx->handle->host_little_endian) { value_swapped = rtl_alloc_register(unit, raw_type); rtl_add_insn(unit, RTLOP_BSWAP, value_swapped, value_raw, 0, 0); } else { value_swapped = value_raw; } const int value = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_BITCAST, value, value_swapped, 0, 0); set_fpr(ctx, frD, value); ctx->fpr_raw[frD] = value_raw; // Must come after set_fpr()! } else { const int value = rtl_alloc_register(unit, type); rtl_add_insn(unit, rtlop, value, host_address, 0, disp); set_fpr(ctx, frD, value); } } if (update) { if (is_indexed || insn_d(insn) != 0) { set_gpr(ctx, insn_rA(insn), ea); } } } /*-----------------------------------------------------------------------*/ /** * translate_load_store_gpr: Translate an integer load or store instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL instruction to perform the operation (assuming a * same-endian host). * is_store: True if the instruction is a store instruction. * is_indexed: True if the access is an indexed access (like lwx or stwx). * update: True if register rA should be updated with the final EA. */ static void translate_load_store_gpr( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool is_store, bool is_indexed, bool update) { RTLUnit * const unit = ctx->unit; const RTLOpcode rtlop_raw = rtlop; if (ctx->handle->host_little_endian) { ASSERT(rtlop != RTLOP_LOAD_S16_BR); // No such PowerPC instruction. switch (rtlop) { case RTLOP_LOAD: rtlop = RTLOP_LOAD_BR; break; case RTLOP_LOAD_U16: rtlop = RTLOP_LOAD_U16_BR; break; case RTLOP_LOAD_S16: rtlop = RTLOP_LOAD_S16_BR; break; case RTLOP_STORE: rtlop = RTLOP_STORE_BR; break; case RTLOP_STORE_I16: rtlop = RTLOP_STORE_I16_BR; break; case RTLOP_LOAD_BR: rtlop = RTLOP_LOAD; break; case RTLOP_LOAD_U16_BR: rtlop = RTLOP_LOAD_U16; break; case RTLOP_STORE_BR: rtlop = RTLOP_STORE; break; case RTLOP_STORE_I16_BR: rtlop = RTLOP_STORE_I16; break; default: break; } } int disp, ea; const int host_address = gen_load_store_address(ctx, insn, is_indexed, update, &disp, &ea); if (is_store) { const int rS = insn_rS(insn); if (rtlop_raw == RTLOP_STORE && ctx->gpr_raw[rS]) { rtl_add_insn(unit, rtlop_raw, 0, host_address, ctx->gpr_raw[rS], disp); } else { const int value = get_gpr(ctx, rS); rtl_add_insn(unit, rtlop, 0, host_address, value, disp); } } else { const int rD = insn_rD(insn); /* Only forward a GPR load if the host endianness is different * (otherwise there's no benefit, since the raw bits and final * value are identical). */ if (rtlop_raw == RTLOP_LOAD && rtlop == RTLOP_LOAD_BR && ctx->forward_loads) { const int value_raw = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop_raw, value_raw, host_address, 0, disp); const int value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BSWAP, value, value_raw, 0, 0); set_gpr(ctx, rD, value); ctx->gpr_raw[rD] = value_raw; // Must come after set_gpr()! } else { const int value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, value, host_address, 0, disp); set_gpr(ctx, rD, value); } } if (update) { if (is_indexed || insn_d(insn) != 0) { set_gpr(ctx, insn_rA(insn), ea); } } } /*-----------------------------------------------------------------------*/ /** * translate_load_store_multiple: Translate a load or store multiple * instruction (lmw/stmw). * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_store: True if the instruction is a store instruction (stmw). */ static void translate_load_store_multiple( GuestPPCContext *ctx, uint32_t insn, bool is_store) { RTLUnit * const unit = ctx->unit; RTLOpcode rtlop = is_store ? RTLOP_STORE : RTLOP_LOAD; if (ctx->handle->host_little_endian) { rtlop = (is_store ? RTLOP_STORE_BR : RTLOP_LOAD_BR); } int host_address = get_ea_base(ctx, insn); int disp = insn_d(insn); if (disp >= (int)(32768 - 4*(31-insn_rD(insn)))) { /* Advancing the offset will wrap around to negative values! * Add it into the base address as a workaround. */ const int base = host_address; host_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADDI, host_address, base, 0, disp); disp = 0; } int rD = insn_rD(insn); int reg[4]; /* Copy data in batches of 4 registers to minimize load stalls. */ if (is_store) { for (int i = rD; i & 3; i++) { reg[i & 3] = get_gpr(ctx, i); } for (; rD & 3; rD++, disp += 4) { rtl_add_insn(unit, rtlop, 0, host_address, reg[rD & 3], disp); } } else { for (int i = rD; i & 3; i++, disp += 4) { reg[i & 3] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, reg[i & 3], host_address, 0, disp); } for (; rD & 3; rD++) { set_gpr(ctx, rD, reg[rD & 3]); } } for (; rD < 32; rD += 4) { if (is_store) { for (int i = 0; i < 4; i++) { reg[i] = get_gpr(ctx, rD+i); } for (int i = 0; i < 4; i++, disp += 4) { rtl_add_insn(unit, rtlop, 0, host_address, reg[i], disp); } } else { for (int i = 0; i < 4; i++, disp += 4) { reg[i] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, reg[i], host_address, 0, disp); } for (int i = 0; i < 4; i++) { set_gpr(ctx, rD+i, reg[i]); } } } } /*-----------------------------------------------------------------------*/ /** * translate_load_store_ps: Translate a paired-single load or store * instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_store: True if the instruction is a store instruction. * is_indexed: True if the access is an indexed access (like lwx or stwx). * update: True if register rA should be updated with the final EA. */ static void translate_load_store_ps( GuestPPCContext *ctx, uint32_t insn, bool is_store, bool is_indexed, bool update) { RTLUnit * const unit = ctx->unit; const int frD_index = insn_frD(insn); const int gqr_index = is_indexed ? insn_I_22(insn) : insn_I_17(insn); const bool use_both = is_indexed ? !insn_W_21(insn) : !insn_W_16(insn); const RTLOpcode rtlop_32 = (ctx->handle->host_little_endian ? (is_store ? RTLOP_STORE_BR : RTLOP_LOAD_BR) : (is_store ? RTLOP_STORE : RTLOP_LOAD)); const RTLOpcode rtlop_u16 = (ctx->handle->host_little_endian ? (is_store ? RTLOP_STORE_I16_BR : RTLOP_LOAD_U16_BR) : (is_store ? RTLOP_STORE_I16 : RTLOP_LOAD_U16)); const RTLOpcode rtlop_s16 = (ctx->handle->host_little_endian ? (is_store ? RTLOP_STORE_I16_BR : RTLOP_LOAD_S16_BR) : (is_store ? RTLOP_STORE_I16 : RTLOP_LOAD_S16)); const bool have_constant_gqr = (ctx->handle->guest_opt & BINREC_OPT_G_PPC_CONSTANT_GQRS) && ctx->handle->opt_state != NULL; uint32_t cgqr_value_raw; if (have_constant_gqr) { uint32_t *state_gqr_ptr = (uint32_t *)((uintptr_t)ctx->handle->opt_state + ctx->handle->setup.state_offsets_ppc.gqr); cgqr_value_raw = state_gqr_ptr[gqr_index]; } else { cgqr_value_raw = 0; } const unsigned int cgqr_value = is_store ? cgqr_value_raw & 0xFFFF : cgqr_value_raw >> 16; const int cgqr_type = cgqr_value & 7; const int cgqr_scale = (int16_t)(cgqr_value << 2) >> 10; /* For store operations, if not using constant GQR mode, make sure the * alias is loaded here so it's not initialized on a conditional path. */ if (is_store && !have_constant_gqr && !ctx->live.fpr[frD_index]) { (void) get_fpr(ctx, frD_index); } int disp, ea; const int host_address = gen_load_store_address(ctx, insn, is_indexed, update, &disp, &ea); /* Load and test the GQR value. */ int gqr = 0, gqr_type = 0, label_int = 0, label_out = 0; if (!have_constant_gqr) { label_out = rtl_alloc_label(unit); gqr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD, gqr, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.gqr + gqr_index*4); gqr_type = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, gqr_type, gqr, 0, (is_store ? 0 : 16) | 3<<8); const int int_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, int_test, gqr_type, 0, 4); label_int = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, int_test, 0, label_int); } /* Floating-point loads and stores. */ if (!have_constant_gqr || !(cgqr_type & 4)) { if (have_constant_gqr && is_store && use_both && ctx->ps_raw[frD_index]) { rtl_add_insn(unit, RTLOP_STORE, 0, host_address, ctx->ps_raw[frD_index], disp); } else if (is_store) { /* Choose a data type to operate on. If FAST_STFS is _not_ * enabled, we pass 64-bit values through the stfs logic, so * we don't convert to 32-bit here. */ const RTLDataType stype = (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_STFS) ? RTLTYPE_FLOAT32 : get_fpr_scalar_type(ctx, frD_index); const int is64 = (stype == RTLTYPE_FLOAT64); int ps = 0, ps0; if (use_both) { ps = get_fpr_as_type(ctx, frD_index, (is64 ? RTLTYPE_V2_FLOAT64 : RTLTYPE_V2_FLOAT32)); ps0 = rtl_alloc_register(unit, stype); rtl_add_insn(unit, RTLOP_VEXTRACT, ps0, ps, 0, 0); } else { ps0 = get_fpr_as_type(ctx, frD_index, stype); } if (is64) { store_float64_as_32(unit, rtlop_32, host_address, ps0, disp, true); } else { if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_PS_STORE_DENORMALS)) { ps0 = flush_denormal(ctx, ps0); } rtl_add_insn(unit, rtlop_32, 0, host_address, ps0, disp); } if (use_both) { int ps1 = rtl_alloc_register(unit, stype); rtl_add_insn(unit, RTLOP_VEXTRACT, ps1, ps, 0, 1); if (is64) { store_float64_as_32(unit, rtlop_32, host_address, ps1, disp+4, true); } else { if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_PS_STORE_DENORMALS)) { ps1 = flush_denormal(ctx, ps1); } rtl_add_insn(unit, rtlop_32, 0, host_address, ps1, disp+4); } } } else { // !is_store int raw, ps0, ps1; if (ctx->forward_loads && have_constant_gqr && use_both) { raw = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_LOAD, raw, host_address, 0, disp); int swapped; if (ctx->handle->host_little_endian) { swapped = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BSWAP, swapped, raw, 0, 0); } else { swapped = raw; } const int ps0_bits64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SRLI, ps0_bits64, swapped, 0, 32); const int ps0_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, ps0_bits, ps0_bits64, 0, 0); const int ps1_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, ps1_bits, swapped, 0, 0); ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, ps0, ps0_bits, 0, 0); ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, ps1, ps1_bits, 0, 0); ctx->ps1_is_safe &= ~(1 << frD_index); } else { raw = 0; ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, rtlop_32, ps0, host_address, 0, disp); ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); if (use_both) { rtl_add_insn(unit, rtlop_32, ps1, host_address, 0, disp+4); ctx->ps1_is_safe &= ~(1 << frD_index); } else { rtl_add_insn(unit, RTLOP_LOAD_IMM, ps1, 0, 0, 0x3F800000); ctx->ps1_is_safe |= 1 << frD_index; } } ctx->fpr_is_safe &= ~(1 << frD_index); const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, ps0, ps1, 0); set_fpr(ctx, frD_index, frD); if (have_constant_gqr) { ctx->ps_raw[frD_index] = raw; } else { ASSERT(!raw); flush_fpr(ctx, frD_index, true); } } if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); } } /* Integer loads and stores. Set up the quantization scale factor, * if necessary. */ int gqr_scale; if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_int); const int scale_temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, scale_temp, gqr, 0, is_store ? 18 : 2); const int scale_temp2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRAI, scale_temp2, scale_temp, 0, 26); const int scale_exp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, scale_exp, scale_temp2, 0, 23); int gqr_scale_bits; if (is_store) { gqr_scale_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, gqr_scale_bits, scale_exp, 0, 0x3F800000); } else { const int one_bits = rtl_imm32(unit, 0x3F800000); gqr_scale_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, gqr_scale_bits, one_bits, scale_exp, 0); } gqr_scale = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_BITCAST, gqr_scale, gqr_scale_bits, 0, 0); } else if ((cgqr_type & 4) && cgqr_scale != 0) { gqr_scale = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, gqr_scale, 0, 0, is_store ? 0x3F800000 + (cgqr_scale<<23) : 0x3F800000 - (cgqr_scale<<23)); } else { gqr_scale = 0; // No scaling needed. } /* If storing, extract and quantize the values first. */ int ps0_int = 0, ps1_int = 0; if ((!have_constant_gqr || (cgqr_type & 4)) && is_store) { /* Determine the saturation bounds. */ int min_val, max_val; if (have_constant_gqr) { min_val = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, min_val, 0, 0, cgqr_type & 1 ? (cgqr_type & 2 ? -0x8000 : 0) : (cgqr_type & 2 ? -0x80 : 0)); max_val = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, max_val, 0, 0, cgqr_type & 1 ? (cgqr_type & 2 ? 0x7FFF : 0xFFFF) : (cgqr_type & 2 ? 0x7F : 0xFF)); } else { const int signed_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, signed_test, gqr_type, 0, 2); const int sign_bit = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, sign_bit, signed_test, 0, 14); const int int16_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, int16_test, gqr_type, 0, 1); const int shift_temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, shift_temp, int16_test, 0, 1); const int minmax_shift = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, minmax_shift, shift_temp, 0, 3); const int min_base = rtl_imm32(unit, 0); const int max_base = rtl_imm32(unit, 0xFFFF); const int min_signed = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, min_signed, min_base, sign_bit, 0); const int max_signed = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, max_signed, max_base, sign_bit, 0); min_val = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRA, min_val, min_signed, minmax_shift, 0); max_val = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRA, max_val, max_signed, minmax_shift, 0); } /* Quantize the values. For this case, we don't need to check * for SNaNs since both SNaNs and QNaNs convert to the same * output value. Note that we could in theory pass FLOAT64s * straight to FROUNDI in ps_quantize(), but typically psq_st * instructions should be located at the end of a sequence of * calculations which will leave the register in V2_FLOAT32 mode, * so it's probably not worth worrying about the FLOAT64 case. * We preserve FP state since quantization should not raise any * exceptions (unless NO_FPSCR_STATE is enabled, in which case we * don't care about exceptions at all). */ int fpstate = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); } int ps = 0, ps0 = 0; if (ctx->live.fpr[frD_index] && unit->regs[ctx->live.fpr[frD_index]].type == RTLTYPE_V2_FLOAT32) { ps = ctx->live.fpr[frD_index]; ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, ps0, ctx->live.fpr[frD_index], 0, 0); } else { const int ps_64 = get_fpr_as_type(ctx, frD_index, RTLTYPE_V2_FLOAT64); if (use_both) { ps = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VFCVT, ps, ps_64, 0, 0); ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, ps0, ps, 0, 0); } else { const int ps0_64 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, ps0_64, ps_64, 0, 0); ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, ps0, ps0_64, 0, 0); } } ps0_int = ps_quantize(ctx, ps0, gqr_scale, min_val, max_val); if (use_both) { const int ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, ps1, ps, 0, 1); ps1_int = ps_quantize(ctx, ps1, gqr_scale, min_val, max_val); } /* Clear any exceptions raised by the quantization. */ if (fpstate) { rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); } } /* Check the access type. */ int label_int16 = 0, label_sint8 = 0; if (!have_constant_gqr) { const int int16_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, int16_test, gqr_type, 0, 1); label_int16 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, int16_test, 0, label_int16); if (!is_store) { const int sint8_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, sint8_test, gqr_type, 0, 2); label_sint8 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, sint8_test, 0, label_sint8); } } /* 8-bit store or unsigned 8-bit load. */ if (!have_constant_gqr || cgqr_type == 4 || (is_store && cgqr_type == 6)) { if (is_store) { rtl_add_insn(unit, RTLOP_STORE_I8, 0, host_address, ps0_int, disp); if (use_both) { rtl_add_insn(unit, RTLOP_STORE_I8, 0, host_address, ps1_int, disp+1); } } else { const int int0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_U8, int0, host_address, 0, disp); int int1 = 0; if (use_both) { int1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_U8, int1, host_address, 0, disp+1); } const int ps0 = ps_dequantize(ctx, int0, gqr_scale); int ps1; if (use_both) { ps1 = ps_dequantize(ctx, int1, gqr_scale); } else { ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, ps1, 0, 0, 0x3F800000); } const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, ps0, ps1, 0); if (have_constant_gqr) { set_fpr(ctx, frD_index, frD); ctx->fpr_is_safe |= 1 << frD_index; ctx->ps1_is_safe |= 1 << frD_index; } else { set_fpr_and_flush(ctx, frD_index, frD, true); } } if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); } } /* Signed 8-bit load. */ if (!is_store) { if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_sint8); } if (!have_constant_gqr || cgqr_type == 6) { const int int0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_S8, int0, host_address, 0, disp); int int1 = 0; if (use_both) { int1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_S8, int1, host_address, 0, disp+1); } const int ps0 = ps_dequantize(ctx, int0, gqr_scale); int ps1; if (use_both) { ps1 = ps_dequantize(ctx, int1, gqr_scale); } else { ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, ps1, 0, 0, 0x3F800000); } const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, ps0, ps1, 0); if (have_constant_gqr) { set_fpr(ctx, frD_index, frD); ctx->fpr_is_safe |= 1 << frD_index; ctx->ps1_is_safe |= 1 << frD_index; } else { set_fpr_and_flush(ctx, frD_index, frD, true); } } if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); } } /* 16-bit signedness check (if loading data). */ int label_sint16 = 0; if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_int16); if (!is_store) { const int sint16_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, sint16_test, gqr_type, 0, 2); label_sint16 = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, sint16_test, 0, label_sint16); } } /* 16-bit store or unsigned 16-bit load. */ if (!have_constant_gqr || cgqr_type == 5 || (is_store && cgqr_type == 7)) { if (is_store) { rtl_add_insn(unit, rtlop_u16, 0, host_address, ps0_int, disp); if (use_both) { rtl_add_insn(unit, rtlop_u16, 0, host_address, ps1_int, disp+2); } } else { const int int0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop_u16, int0, host_address, 0, disp); int int1 = 0; if (use_both) { int1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop_u16, int1, host_address, 0, disp+2); } const int ps0 = ps_dequantize(ctx, int0, gqr_scale); int ps1; if (use_both) { ps1 = ps_dequantize(ctx, int1, gqr_scale); } else { ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, ps1, 0, 0, 0x3F800000); } const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, ps0, ps1, 0); if (have_constant_gqr) { set_fpr(ctx, frD_index, frD); ctx->fpr_is_safe |= 1 << frD_index; ctx->ps1_is_safe |= 1 << frD_index; } else { set_fpr_and_flush(ctx, frD_index, frD, true); } } if (!have_constant_gqr && !is_store) { rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_out); } } /* Signed 16-bit load. */ if (!is_store) { if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_sint16); } if (!have_constant_gqr || cgqr_type == 7) { const int int0 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop_s16, int0, host_address, 0, disp); int int1 = 0; if (use_both) { int1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop_s16, int1, host_address, 0, disp+2); } const int ps0 = ps_dequantize(ctx, int0, gqr_scale); int ps1; if (use_both) { ps1 = ps_dequantize(ctx, int1, gqr_scale); } else { ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, ps1, 0, 0, 0x3F800000); } const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, ps0, ps1, 0); if (have_constant_gqr) { set_fpr(ctx, frD_index, frD); ctx->fpr_is_safe |= 1 << frD_index; ctx->ps1_is_safe |= 1 << frD_index; } else { set_fpr_and_flush(ctx, frD_index, frD, true); } } } if (!have_constant_gqr) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_out); } if (update) { if (is_indexed || insn_d12(insn) != 0) { set_gpr(ctx, insn_rA(insn), ea); } } } /*-----------------------------------------------------------------------*/ /** * translate_load_store_string: Translate a string load or store * instruction (lswi/lswx/stswi/stswx). * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_store: True if the instruction is a store instruction (stswi/stswx). * is_imm: True if the instruction has an immediate count (lswi/stswi). */ static void translate_load_store_string( GuestPPCContext *ctx, uint32_t insn, bool is_store, bool is_imm) { RTLUnit * const unit = ctx->unit; /* We implement the string move instructions by loading or storing * directly to/from the PSB, so make sure it's up to date, and clear * the live register set so no future code tries to use stale values. */ flush_live_regs(ctx, true); int base_address, host_address; if (insn_rA(insn)) { const int rA = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, rA, 0, 0, ctx->alias.gpr[insn_rA(insn)]); const int rA_zcast = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, rA_zcast, rA, 0, 0); base_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, base_address, ctx->membase_reg, rA_zcast, 0); } else { base_address = ctx->membase_reg; } if (is_imm) { host_address = base_address; } else { const int rB = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, rB, 0, 0, ctx->alias.gpr[insn_rB(insn)]); const int rB_zcast = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, rB_zcast, rB, 0, 0); host_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, host_address, base_address, rB_zcast, 0); } const int psb_reg = ctx->psb_reg; const int gpr_base = ctx->handle->setup.state_offsets_ppc.gpr; const int endian_flip = ctx->handle->host_little_endian ? 3 : 0; if (is_imm) { const int n = insn_NB(insn) ? insn_NB(insn) : 32; /* Unroll into units of 4 bytes, both to try and hide load latency * and since each GPR is 4 bytes wide anyway. */ int rD = insn_rD(insn); for (int i = 0; i+4 <= n; i += 4, rD = (rD + 1) & 31) { int byte_reg[4]; for (int byte = 0; byte < 4; byte++) { byte_reg[byte] = rtl_alloc_register(unit, RTLTYPE_INT32); } const int gpr_offset = gpr_base + 4*rD; if (is_store) { for (int byte = 0; byte < 4; byte++) { rtl_add_insn(unit, RTLOP_LOAD_U8, byte_reg[byte], psb_reg, 0, gpr_offset + (byte ^ endian_flip)); } for (int byte = 0; byte < 4; byte++) { rtl_add_insn(unit, RTLOP_STORE_I8, 0, host_address, byte_reg[byte], i + byte); } } else { for (int byte = 0; byte < 4; byte++) { rtl_add_insn(unit, RTLOP_LOAD_U8, byte_reg[byte], host_address, 0, i + byte); } for (int byte = 0; byte < 4; byte++) { rtl_add_insn(unit, RTLOP_STORE_I8, 0, psb_reg, byte_reg[byte], gpr_offset + (byte ^ endian_flip)); } } } if ((n & 3) != 0) { const int i = n & ~3; int byte_reg[4]; for (int byte = 0; byte < (n & 3); byte++) { byte_reg[byte] = rtl_alloc_register(unit, RTLTYPE_INT32); } const int gpr_offset = gpr_base + 4*rD; if (is_store) { for (int byte = 0; byte < (n & 3); byte++) { rtl_add_insn(unit, RTLOP_LOAD_U8, byte_reg[byte], psb_reg, 0, gpr_offset + (byte ^ endian_flip)); } for (int byte = 0; byte < (n & 3); byte++) { rtl_add_insn(unit, RTLOP_STORE_I8, 0, host_address, byte_reg[byte], i + byte); } } else { for (int byte = 0; byte < (n & 3); byte++) { rtl_add_insn(unit, RTLOP_LOAD_U8, byte_reg[byte], host_address, 0, i + byte); } const int zero = rtl_imm32(unit, 0); for (int byte = (n & 3); byte < 4; byte++) { byte_reg[byte] = zero; } for (int byte = 0; byte < 4; byte++) { rtl_add_insn(unit, RTLOP_STORE_I8, 0, psb_reg, byte_reg[byte], gpr_offset + (byte ^ endian_flip)); } } } } else { // !is_imm /* We don't even attempt to optimize this because it's way too * complicated already. Hopefully nobody actually uses lswx/stswx * anymore. */ const int xer = get_xer(ctx); const int xer_count = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, xer_count, xer, 0, 127); /* We need to zero out the unused bytes of the last register if * not loading a multiple of four bytes. Do this by clearing * the entire register ahead of time for simplicity. */ if (!is_store) { const int count_mod_4 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, count_mod_4, xer_count, 0, 3); const int start_label = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, count_mod_4, 0, start_label); const int last_gpr_temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADDI, last_gpr_temp, xer_count, 0, 4 * insn_rD(insn)); const int last_gpr_offset = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, last_gpr_offset, last_gpr_temp, 0, 31<<2); const int last_gpr_offset_zcast = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, last_gpr_offset_zcast, last_gpr_offset, 0, 0); const int last_gpr_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, last_gpr_address, psb_reg, last_gpr_offset_zcast, 0); rtl_add_insn(unit, RTLOP_STORE, 0, last_gpr_address, rtl_imm32(unit, 0), gpr_base); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, start_label); } /* We use ADDRESS type for the count and GPR offset aliases so we * can add them directly to the base addresses without an * intermediate ZCAST. */ const int count = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ZCAST, count, xer_count, 0, 0); const int init_i = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD_IMM, init_i, 0, 0, 0); const int alias_i = rtl_alloc_alias_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, init_i, 0, alias_i); const int init_gpr_offset = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD_IMM, init_gpr_offset, 0, 0, 4 * insn_rD(insn)); const int alias_gpr_offset = rtl_alloc_alias_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, init_gpr_offset, 0, alias_gpr_offset); const int loop_label = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, loop_label); const int i = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_GET_ALIAS, i, 0, 0, alias_i); const int i_test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTU, i_test, i, count, 0); const int end_label = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, i_test, 0, end_label); const int gpr_offset = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_GET_ALIAS, gpr_offset, 0, 0, alias_gpr_offset); const int mem_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, mem_address, host_address, i, 0); int real_offset; if (endian_flip) { real_offset = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_XORI, real_offset, gpr_offset, 0, endian_flip); } else { real_offset = gpr_offset; } int gpr_address = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADD, gpr_address, psb_reg, real_offset, 0); const int value = rtl_alloc_register(unit, RTLTYPE_INT32); if (is_store) { rtl_add_insn(unit, RTLOP_LOAD_U8, value, gpr_address, 0, gpr_base); rtl_add_insn(unit, RTLOP_STORE_I8, 0, mem_address, value, 0); } else { rtl_add_insn(unit, RTLOP_LOAD_U8, value, mem_address, 0, 0); rtl_add_insn(unit, RTLOP_STORE_I8, 0, gpr_address, value, gpr_base); } int new_i = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADDI, new_i, i, 0, 1); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_i, 0, alias_i); const int gpr_offset_temp = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ADDI, gpr_offset_temp, gpr_offset, 0, 1); int new_gpr_offset = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_ANDI, new_gpr_offset, gpr_offset_temp, 0, 127); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_gpr_offset, 0, alias_gpr_offset); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, loop_label); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, end_label); } // if (is_imm) } /*-----------------------------------------------------------------------*/ /** * translate_logic_crb: Translate a logic instruction operating on CR bits. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL register-register instruction to perform the operation. * invert_crbB: True if the value of crbB should be inverted. * invert_result: True if the result should be inverted. */ static void translate_logic_crb( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool invert_crbB, bool invert_result) { RTLUnit * const unit = ctx->unit; const int crbA = get_crb(ctx, insn_crbA(insn)); int crbB = get_crb(ctx, insn_crbB(insn)); if (invert_crbB) { const int inverted = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, inverted, crbB, 0, 1); crbB = inverted; } int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, result, crbA, crbB, 0); if (invert_result) { const int inverted = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, inverted, result, 0, 1); result = inverted; } set_crb(ctx, insn_crbD(insn), result); } /*-----------------------------------------------------------------------*/ /** * translate_logic_imm: Translate an integer register-immediate logic * instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL register-immediate instruction to perform the operation. * shift_imm: True if the immediate value should be shifted left 16 bits. * set_cr0: True if CR0 should be set according to the result. */ static void translate_logic_imm( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool shift_imm, bool set_cr0) { RTLUnit * const unit = ctx->unit; const int rS = get_gpr(ctx, insn_rS(insn)); const uint32_t imm = shift_imm ? insn_UIMM(insn)<<16 : insn_UIMM(insn); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, result, rS, 0, imm); set_gpr(ctx, insn_rA(insn), result); if (set_cr0) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_logic_reg: Translate an integer register-register logic * instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL register-register instruction to perform the operation. * invert_rB: True if the value of rB should be inverted. * invert_result: True if the result should be inverted. */ static void translate_logic_reg( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool invert_rB, bool invert_result) { RTLUnit * const unit = ctx->unit; const int rS = get_gpr(ctx, insn_rS(insn)); int rB = get_gpr(ctx, insn_rB(insn)); if (invert_rB) { const int inverted = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NOT, inverted, rB, 0, 0); rB = inverted; } int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, result, rS, rB, 0); if (invert_result) { const int inverted = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NOT, inverted, result, 0, 0); result = inverted; } set_gpr(ctx, insn_rA(insn), result); if (insn_Rc(insn)) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_lwarx: Translate a lwarx instruction. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * insn: Instruction word. */ static void translate_lwarx( GuestPPCContext *ctx, uint32_t address, uint32_t insn) { const binrec_t *handle = ctx->handle; RTLUnit * const unit = ctx->unit; const int psb_reg = ctx->psb_reg; const int host_address = get_ea_indexed(ctx, insn, NULL); /* * If enabled, we optimize the common case of a loop containing an * lwarx followed by a stwcx (with no intervening branches or branch * targets) by saving the loaded value in an RTL register instead of * storing it back to the PSB, then using that value in the * accompanying stwcx. translation instead of reloading it from the * PSB. Knowledge of the pairing also lets us omit the reserve_flag * check from stwcx., since the translator design ensures that code * flow cannot be interrupted between the two instructions. * * If the optimization is not enabled, paired_lwarx will always be * set to ~0, so it can never test equal to the instruction address. */ const int value_be = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD, value_be, host_address, 0, 0); int value; if (handle->host_little_endian) { value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BSWAP, value, value_be, 0, 0); } else { value = value_be; } if (address == ctx->blocks[ctx->current_block].paired_lwarx) { ctx->paired_lwarx_data = value; ctx->paired_lwarx_data_be = value_be; } else { rtl_add_insn(unit, RTLOP_STORE_I8, 0, psb_reg, rtl_imm32(unit, 1), handle->setup.state_offsets_ppc.reserve_flag); rtl_add_insn(unit, RTLOP_STORE, 0, psb_reg, value_be, handle->setup.state_offsets_ppc.reserve_state); } set_gpr(ctx, insn_rD(insn), value); } /*-----------------------------------------------------------------------*/ /** * translate_move_fpr: Translate an fmr/fneg/fabs/fnabs or * ps_mr/ps_neg/ps_abs/ps_nabs instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * opcode: RTL opcode for the operation. * is_paired: True if a paired-single operation, false if not. */ static void translate_move_fpr( GuestPPCContext *ctx, uint32_t insn, RTLOpcode opcode, bool is_paired) { const RTLDataType type = is_paired ? RTLTYPE_V2_FLOAT32 : RTLTYPE_FLOAT64; const int frB = get_fpr_as_type(ctx, insn_frB(insn), type); if (opcode == RTLOP_MOVE) { set_fpr(ctx, insn_frD(insn), frB); } else { RTLUnit * const unit = ctx->unit; const int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, opcode, result, frB, 0, 0); set_fpr(ctx, insn_frD(insn), result); } if (ctx->fpr_is_safe & (1 << insn_frB(insn))) { ctx->fpr_is_safe |= 1 << insn_frD(insn); } else { ctx->fpr_is_safe &= ~(1 << insn_frD(insn)); } if (is_paired) { if (ctx->ps1_is_safe & (1 << insn_frB(insn))) { ctx->ps1_is_safe |= 1 << insn_frD(insn); } else { ctx->ps1_is_safe &= ~(1 << insn_frD(insn)); } } if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_move_spr: Translate an mfspr or mtspr instruction. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * insn: Instruction word. * to_spr: True for mtspr, false for mfspr. */ static void translate_move_spr( GuestPPCContext *ctx, uint32_t address, uint32_t insn, bool to_spr) { RTLUnit * const unit = ctx->unit; const int spr = insn_spr(insn); switch (spr) { case SPR_XER: if (to_spr) { set_xer(ctx, get_gpr(ctx, insn_rS(insn)), 0); } else { set_gpr(ctx, insn_rD(insn), get_xer(ctx)); } break; case SPR_LR: if (to_spr) { set_lr(ctx, get_gpr(ctx, insn_rS(insn))); } else { set_gpr(ctx, insn_rD(insn), get_lr(ctx)); } break; case SPR_CTR: if (to_spr) { set_ctr(ctx, get_gpr(ctx, insn_rS(insn))); } else { set_gpr(ctx, insn_rD(insn), get_ctr(ctx)); } break; case SPR_TBL: case SPR_TBU: if (to_spr) { log_warning(ctx->handle, "0x%X: Invalid attempt to write TB%c", address, spr==SPR_TBU ? 'U' : 'L'); rtl_add_insn(unit, RTLOP_ILLEGAL, 0, 0, 0, 0); } else { /* We have to flush rD since we set it differently depending on * whether a timebase handler function is present. */ const int rD = insn_rD(insn); flush_gpr(ctx, rD); const int label_no_handler = rtl_alloc_label(unit); const int label_end = rtl_alloc_label(unit); const int func = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD, func, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.timebase_handler); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, func, 0, label_no_handler); const int result64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_CALL, result64, func, ctx->psb_reg, 0); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); if (spr == SPR_TBL) { rtl_add_insn(unit, RTLOP_ZCAST, result, result64, 0, 0); } else { const int shifted = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SRLI, shifted, result64, 0, 32); rtl_add_insn(unit, RTLOP_ZCAST, result, shifted, 0, 0); } rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, result, 0, ctx->alias.gpr[rD]); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_end); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_no_handler); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit,0), 0, ctx->alias.gpr[rD]); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_end); } break; case SPR_UPVR: if (to_spr) { log_warning(ctx->handle, "0x%X: Invalid attempt to write UPVR", address); rtl_add_insn(unit, RTLOP_ILLEGAL, 0, 0, 0, 0); } else { const int value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD, value, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.pvr); set_gpr(ctx, insn_rD(insn), value); } break; case SPR_UGQR(0): case SPR_UGQR(1): case SPR_UGQR(2): case SPR_UGQR(3): case SPR_UGQR(4): case SPR_UGQR(5): case SPR_UGQR(6): case SPR_UGQR(7): if (to_spr) { const int rS = get_gpr(ctx, insn_rS(insn)); rtl_add_insn(unit, RTLOP_STORE, 0, ctx->psb_reg, rS, ctx->handle->setup.state_offsets_ppc.gqr + 4 * (spr & 7)); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_CONSTANT_GQRS) { return_from_unit(ctx, address, rtl_imm32(unit, address+4), true); } } else { const int value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD, value, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.gqr + 4 * (spr & 7)); set_gpr(ctx, insn_rD(insn), value); } break; case SPR_UPIR: if (to_spr) { log_warning(ctx->handle, "0x%X: Invalid attempt to write UPIR", address); rtl_add_insn(unit, RTLOP_ILLEGAL, 0, 0, 0, 0); } else { const int value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD, value, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.pir); set_gpr(ctx, insn_rD(insn), value); } break; case SPR_PVR: if (to_spr) { log_warning(ctx->handle, "0x%X: Invalid attempt to write PVR", address); rtl_add_insn(unit, RTLOP_ILLEGAL, 0, 0, 0, 0); break; } /* else fall through */ case SPR_GQR(0): case SPR_GQR(1): case SPR_GQR(2): case SPR_GQR(3): case SPR_GQR(4): case SPR_GQR(5): case SPR_GQR(6): case SPR_GQR(7): case SPR_PIR: log_warning(ctx->handle, "0x%X: %s on supervisor SPR %d", address, to_spr ? "mtspr" : "mfspr/mftb", spr); rtl_add_insn(unit, RTLOP_ILLEGAL, 0, 0, 0, 0); break; default: log_warning(ctx->handle, "0x%X: %s on invalid SPR %d", address, to_spr ? "mtspr" : "mfspr/mftb", spr); rtl_add_insn(unit, RTLOP_ILLEGAL, 0, 0, 0, 0); break; } } /*-----------------------------------------------------------------------*/ /** * translate_muldiv_reg: Translate an integer register-register multiply * or divide instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL register-immediate instruction to perform the operation. * do_overflow: True to update XER[SO:OV] based on overflow state. */ static void translate_muldiv_reg( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool do_overflow) { RTLUnit * const unit = ctx->unit; const int rA = get_gpr(ctx, insn_rA(insn)); const int rB = get_gpr(ctx, insn_rB(insn)); /* For division, we might skip over the actual division operation, so * store the target register now. We handle XER (when OE is set) * separately, since we have to set SO|OV anyway on the overflow path. */ const bool is_divide = (rtlop == RTLOP_DIVU || rtlop == RTLOP_DIVS); if (is_divide) { flush_gpr(ctx, insn_rD(insn)); } int div_skip_label = 0; int xer = 0; if (is_divide) { if (do_overflow) { xer = get_xer(ctx); } div_skip_label = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, rB, 0, div_skip_label); if (rtlop == RTLOP_DIVS) { int noskip_label = rtl_alloc_label(unit); const int rA_is_80000000 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, rA_is_80000000, rA, 0, UINT64_C(-0x80000000)); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, rA_is_80000000, 0, noskip_label); const int rB_is_FFFFFFFF = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, rB_is_FFFFFFFF, rB, 0, -1); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, rB_is_FFFFFFFF, 0, div_skip_label); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, noskip_label); } } const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, result, rA, rB, 0); if (is_divide) { rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, result, 0, ctx->alias.gpr[insn_rD(insn)]); } else { set_gpr(ctx, insn_rD(insn), result); } if (do_overflow) { if (!xer) { xer = get_xer(ctx); } const int masked_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_xer, xer, 0, ~XER_OV); if (rtlop == RTLOP_MUL) { /* mullwo's overflow check is for signed integers, so we can't * just check for the high word being nonzero. */ const int result_hi = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_MULHS, result_hi, rA, rB, 0); const int lo_sign = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRAI, lo_sign, result, 0, 31); const int overflow = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XOR, overflow, result_hi, lo_sign, 0); const int SO_OV = rtl_imm32(unit, XER_SO | XER_OV); const int bits_to_set = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SELECT, bits_to_set, SO_OV, overflow, overflow); const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_xer, masked_xer, bits_to_set, 0); set_xer(ctx, new_xer, 0); } else { ASSERT(rtlop == RTLOP_DIVU || rtlop == RTLOP_DIVS); set_xer(ctx, masked_xer, 0); } } if (div_skip_label) { int div_continue_label = 0; if (do_overflow) { ctx->last_set.xer = -1; ctx->last_set.xer_so = -1; ctx->live.xer = 0; ctx->live.xer_so = 0; div_continue_label = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, div_continue_label); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, div_skip_label); if (do_overflow) { const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, new_xer, xer, 0, XER_SO | XER_OV); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, new_xer, 0, ctx->alias.xer); if (ctx->alias.xer_so) { rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit,1), 0, ctx->alias.xer_so); } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, div_continue_label); } } if (insn_Rc(insn)) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_ps_arith: Translate a two-operand paired-single arithmetic * instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL opcode to perform the operation. * frC_slot: frC slot to use as the multiplier for ps_muls[01], else -1. * vxfoo_no_snan: FPSCR_VXFOO bitmask indicating which non-VXSNAN * exception(s) can be raised by the instruction. */ static void translate_ps_arith( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, int frC_slot, uint32_t vxfoo_no_snan) { RTLUnit * const unit = ctx->unit; const int src1_fpr = insn_frA(insn); const int src2_fpr = (rtlop==RTLOP_FMUL ? insn_frC(insn) : insn_frB(insn)); bool use_float32 = false; const bool src1_32 = (get_fpr_scalar_type(ctx, src1_fpr) == RTLTYPE_FLOAT32); const bool src2_32 = (get_fpr_scalar_type(ctx, src2_fpr) == RTLTYPE_FLOAT32); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS) { use_float32 = src1_32 || src2_32; } else { use_float32 = src1_32 && src2_32; } const RTLDataType type = use_float32 ? RTLTYPE_V2_FLOAT32 : RTLTYPE_V2_FLOAT64; int src1 = get_fpr_as_type(ctx, src1_fpr, type); int src2 = get_fpr_as_type(ctx, src2_fpr, type); if (frC_slot >= 0) { const int multiplier = rtl_alloc_register( unit, use_float32 ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, multiplier, src2, 0, frC_slot); src2 = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_VBROADCAST, src2, multiplier, 0, 0); } if (rtlop == RTLOP_FMUL && type == RTLTYPE_V2_FLOAT64 && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMULS)) { int src1_ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, src1_ps0, src1, 0, 0); int src2_ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, src2_ps0, src2, 0, 0); round_for_multiply(ctx, &src1_ps0, &src2_ps0); const int new_src1 = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VINSERT, new_src1, src1, src1_ps0, 0); const int new_src2 = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VINSERT, new_src2, src2, src2_ps0, 0); src1 = new_src1; src2 = new_src2; } int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, rtlop, result, src1, src2, 0); if (!use_float32) { const int result64 = result; result = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VFCVT, result, result64, 0, 0); } check_fp_underflow(ctx, result, rtlop, src1, src2, 0, true, true, use_float32); set_ps_result(ctx, insn_frD(insn), result, rtlop, src1, src2, 0, use_float32, vxfoo_no_snan); if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_ps_fma: Translate a paired-single multiply-add instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL opcode to perform the operation. * frC_slot: frC slot to use as the multiplier for ps_madds[01], else -1. * negate: True to negate non-NaN result values. */ static void translate_ps_fma( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, int frC_slot, bool negate) { RTLUnit * const unit = ctx->unit; bool use_float32 = false; const bool frA_32 = (get_fpr_scalar_type(ctx, insn_frA(insn)) == RTLTYPE_FLOAT32); const bool frB_32 = (get_fpr_scalar_type(ctx, insn_frB(insn)) == RTLTYPE_FLOAT32); const bool frC_32 = (get_fpr_scalar_type(ctx, insn_frC(insn)) == RTLTYPE_FLOAT32); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS) { use_float32 = frA_32 || frB_32 || frC_32; } else { use_float32 = frA_32 && frB_32 && frC_32; } const RTLDataType type = use_float32 ? RTLTYPE_V2_FLOAT32 : RTLTYPE_V2_FLOAT64; const RTLDataType scalar_type = rtl_vector_element_type(type); int frA = get_fpr_as_type(ctx, insn_frA(insn), type); int frC = get_fpr_as_type(ctx, insn_frC(insn), type); int frB = get_fpr_as_type(ctx, insn_frB(insn), type); /* We can only use SIMD instructions if there are no special cases to * worry about; otherwise, the complexity of dealing with edge cases * in each of the paired-single slots becomes prohibitive. */ if (!negate && (ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN) && (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMADDS) && (ctx->handle->guest_opt & (BINREC_OPT_G_PPC_IGNORE_FPSCR_VXFOO | BINREC_OPT_G_PPC_NO_FPSCR_STATE))) { if (frC_slot >= 0) { const int multiplier = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, multiplier, frC, 0, frC_slot); frC = multiplier; } if (type == RTLTYPE_V2_FLOAT64 && frC_slot != 1 // Slot 1 is already in single precision. && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMULS)) { int frA_ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, frA_ps0, frA, 0, 0); int frC_ps0; if (frC_slot == 0) { frC_ps0 = frC; } else { frC_ps0 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, frC_ps0, frC, 0, 0); } round_for_multiply(ctx, &frA_ps0, &frC_ps0); if (frC_slot == 0) { int frA_ps1 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, frA_ps1, frA, 0, 1); int frC_ps1 = frC; round_for_multiply(ctx, &frA_ps1, &frC_ps1); frA = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBUILD2, frA, frA_ps0, frA_ps1, 0); frC = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBUILD2, frC, frC_ps0, frC_ps1, 0); } else { const int new_frA = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VINSERT, new_frA, frA, frA_ps0, 0); const int new_frC = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VINSERT, new_frC, frC, frC_ps0, 0); frA = new_frA; frC = new_frC; } } else if (frC_slot >= 0) { const int multiplier = frC; frC = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_VBROADCAST, frC, multiplier, 0, 0); } int result = rtl_alloc_register(unit, type); rtl_add_insn(unit, rtlop, result, frA, frC, frB); if (!use_float32) { const int result64 = result; result = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VFCVT, result, result64, 0, 0); } check_fp_underflow(ctx, result, rtlop, frA, frC, frB, true, true, use_float32); set_fp_result(ctx, insn_frD(insn), result, 0, frA, frB, frC, 0, 0, true, false, true, true); } else { // SIMD instructions not usable int frA_ps[2], frB_ps[2], frC_ps[2]; frA_ps[0] = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frA_ps[0], frA, 0, 0); frA_ps[1] = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frA_ps[1], frA, 0, 1); frC_ps[0] = rtl_alloc_register(unit, scalar_type); if (frC_slot >= 0) { rtl_add_insn(unit, RTLOP_VEXTRACT, frC_ps[0], frC, 0, frC_slot); frC_ps[1] = frC_ps[0]; } else { rtl_add_insn(unit, RTLOP_VEXTRACT, frC_ps[0], frC, 0, 0); frC_ps[1] = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frC_ps[1], frC, 0, 1); } frB_ps[0] = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frB_ps[0], frB, 0, 0); frB_ps[1] = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frB_ps[1], frB, 0, 1); if (type == RTLTYPE_V2_FLOAT64 && frC_slot != 1 && !(ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMULS)) { round_for_multiply(ctx, &frA_ps[0], &frC_ps[0]); if (frC_slot == 0) { round_for_multiply(ctx, &frA_ps[1], &frC_ps[1]); } } int frD_ps[2], fi_fprf[2], invalid[2]; int saved_frD = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { flush_fpr(ctx, insn_frD(insn), false); saved_frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, saved_frD, 0, 0, ctx->alias.fpr[insn_frD(insn)]); } /* We have to process each slot sequentially so set_fp_result() sees * the set of exceptions for that slot (and not the other one). */ for (int slot = 0; slot < 2; slot++) { int result = rtl_alloc_register(unit, scalar_type); rtl_add_insn(unit, rtlop, result, frA_ps[slot], frC_ps[slot], frB_ps[slot]); invalid[slot] = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); invalid[slot] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, invalid[slot], fpstate, 0, RTLFEXC_INVALID); } if (negate) { result = fma_negate(ctx, result); } if (!(ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN)) { result = fma_select_nan( ctx, result, frA_ps[slot], frB_ps[slot], frC_ps[slot]); } if (!use_float32) { if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FAST_FMADDS) { const int result64 = result; result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, result, result64, 0, 0); } else { result = round_fma_result_to_single( ctx, result, rtlop, frA_ps[slot], frB_ps[slot], frC_ps[slot]); } } check_fp_underflow(ctx, result, rtlop, frA_ps[slot], frC_ps[slot], frB_ps[slot], true, false, use_float32); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) { frD_ps[slot] = result; fi_fprf[slot] = 0; } else { set_fp_result(ctx, insn_frD(insn), result, 0, frA_ps[slot], frB_ps[slot], frC_ps[slot], 0, FPSCR_VXIMZ | FPSCR_VXISI, true, false, true, true); frD_ps[slot] = get_fpr_as_type(ctx, insn_frD(insn), RTLTYPE_FLOAT64); fi_fprf[slot] = get_fr_fi_fprf(ctx); } } if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) { const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, frD_ps[0], frD_ps[1], 0); set_fpr(ctx, insn_frD(insn), frD); ctx->fpr_is_safe |= 1 << insn_frD(insn); ctx->ps1_is_safe |= 1 << insn_frD(insn); } else { const int fi_ps1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fi_ps1, fi_fprf[1], 0, 0x20); const int final_fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, final_fi_fprf, fi_fprf[0], fi_ps1, 0); set_fr_fi_fprf_and_flush(ctx, final_fi_fprf); const int invalid_any = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, invalid_any, invalid[0], invalid[1], 0); const int label_do_result = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, invalid_any, 0, label_do_result); const int fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fpscr, 0, 0, ctx->alias.fpscr); const int has_ve = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, has_ve, fpscr, 0, FPSCR_VE); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, has_ve, 0, label_do_result); set_fpr_and_flush(ctx, insn_frD(insn), saved_frD, false); const int label_skip_result = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_skip_result); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_do_result); const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBUILD2, frD, frD_ps[0], frD_ps[1], 0); set_fpr_and_flush(ctx, insn_frD(insn), frD, true); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_skip_result); } } if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_ps_merge: Translate a ps_merge* instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * frA_index: Paired-single slot index for frA (copied to frD[ps0]). * frB_index: Paired-single slot index for frB (copied to frD[ps1]). */ static void translate_ps_merge(GuestPPCContext *ctx, uint32_t insn, int frA_index, int frB_index) { RTLUnit * const unit = ctx->unit; int frA; if (frA_index == 0) { frA = get_fpr_as_type(ctx, insn_frA(insn), RTLTYPE_FLOAT32); } else { frA = get_ps1(ctx, insn_frA(insn), RTLTYPE_FLOAT32); } int frB; if (frB_index == 0) { if (get_fpr_scalar_type(ctx, insn_frB(insn)) == RTLTYPE_FLOAT32 || (ctx->handle->guest_opt & BINREC_OPT_G_PPC_SINGLE_PREC_INPUTS)) { frB = get_fpr_as_type(ctx, insn_frB(insn), RTLTYPE_FLOAT32); } else { /* When moving a double-precision value into the ps1 slot, * the value is always truncated rather than being rounded * based on FPSCR[RN]. */ const int frB_64 = get_fpr_as_type(ctx, insn_frB(insn), RTLTYPE_FLOAT64); const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int fpstate_trunc = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, fpstate_trunc, fpstate, 0, RTLFROUND_TRUNC); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate_trunc, 0, 0); const bool snan_safe = (ctx->fpr_is_safe & (1 << insn_frB(insn))) || (ctx->handle->guest_opt & BINREC_OPT_G_PPC_ASSUME_NO_SNAN); frB = fcast_64to32(unit, frB_64, !snan_safe); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); } } else { frB = get_ps1(ctx, insn_frB(insn), RTLTYPE_FLOAT32); } const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, frA, frB, 0); set_fpr(ctx, insn_frD(insn), frD); if ((frA_index ? ctx->ps1_is_safe : ctx->fpr_is_safe) & (1 << insn_frA(insn))) { ctx->fpr_is_safe |= 1 << insn_frD(insn); } else { ctx->fpr_is_safe &= ~(1 << insn_frD(insn)); } if ((frB_index ? ctx->ps1_is_safe : ctx->fpr_is_safe) & (1 << insn_frB(insn))) { ctx->ps1_is_safe |= 1 << insn_frD(insn); } else { ctx->ps1_is_safe &= ~(1 << insn_frD(insn)); } if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_ps_recip: Translate a ps_res or ps_rsqrte instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_rsqrte: True if the instruction is ps_rsqrte, false if ps_res. */ static void translate_ps_recip(GuestPPCContext *ctx, uint32_t insn, bool is_rsqrte) { RTLUnit * const unit = ctx->unit; const int frB = get_fpr_as_type(ctx, insn_frB(insn), RTLTYPE_V2_FLOAT32); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NATIVE_RECIPROCAL) { const uint32_t vxfoo_no_snan = is_rsqrte ? FPSCR_VXSQRT : 0; int div_src; if (is_rsqrte) { const int sqrt_frB = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_FSQRT, sqrt_frB, frB, 0, 0); div_src = sqrt_frB; } else { div_src = frB; } const int one = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_LOAD_IMM, one, 0, 0, 0x3F800000); const int ones = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBROADCAST, ones, one, 0, 0); const int result = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_FDIV, result, ones, div_src, 0); set_ps_result(ctx, insn_frD(insn), result, is_rsqrte ? RTLOP_FSQRT : RTLOP_FDIV, ones, frB, one, true, vxfoo_no_snan); } else { // !NATIVE_RECIPROCAL int alias_skip_set = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { flush_fpr(ctx, insn_frD(insn), true); alias_skip_set = rtl_alloc_alias_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit,0), 0, alias_skip_set); } int frD_ps[2], fi_fprf = 0; for (int slot = 0; slot < 2; slot++) { const int alias = rtl_alloc_alias_register(unit, RTLTYPE_FLOAT32); int label_skip_set = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { label_skip_set = rtl_alloc_label(unit); } const int frB_ps = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_VEXTRACT, frB_ps, frB, 0, slot); if (is_rsqrte) { translate_frsqrte_lookup(ctx, frB_ps, alias, label_skip_set); } else { translate_fres_lookup(ctx, frB_ps, alias, label_skip_set); } if (label_skip_set) { const int label_do_set = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_do_set); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_skip_set); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit,1), 0, alias_skip_set); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_do_set); } if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { if (slot == 0) { fi_fprf = get_fr_fi_fprf(ctx); } else if (is_rsqrte) { set_fr_fi_fprf_and_flush(ctx, fi_fprf); } else { const int fi_fprf_ps1 = get_fr_fi_fprf(ctx); const int fi_ps1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, fi_ps1, fi_fprf_ps1, 0, 0x20); const int final_fi_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, final_fi_fprf, fi_fprf, fi_ps1, 0); set_fr_fi_fprf_and_flush(ctx, final_fi_fprf); } } frD_ps[slot] = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, frD_ps[slot], 0, 0, alias); } int label_skip_set = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int test_skip_set = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, test_skip_set, 0, 0, alias_skip_set); label_skip_set = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, test_skip_set, 0, label_skip_set); } const int frD = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, frD, frD_ps[0], frD_ps[1], 0); if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) { set_fpr(ctx, insn_frD(insn), frD); ctx->fpr_is_safe |= 1 << insn_frD(insn); ctx->ps1_is_safe |= 1 << insn_frD(insn); } else { set_fpr_and_flush(ctx, insn_frD(insn), frD, true); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_skip_set); } } if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_ps_sel: Translate a ps_sel instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. */ static void translate_ps_sel(GuestPPCContext *ctx, uint32_t insn) { RTLUnit * const unit = ctx->unit; int fpstate = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); } const RTLDataType frA_scalar_type = get_fpr_scalar_type(ctx, insn_frA(insn)); const RTLDataType frA_type = (frA_scalar_type == RTLTYPE_FLOAT32 ? RTLTYPE_V2_FLOAT32 : RTLTYPE_V2_FLOAT64); const bool frBC_float32 = (get_fpr_scalar_type(ctx, insn_frB(insn)) == RTLTYPE_FLOAT32 && get_fpr_scalar_type(ctx, insn_frC(insn)) == RTLTYPE_FLOAT32); const RTLDataType frBC_scalar_type = (frBC_float32 ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64); const RTLDataType frBC_type = (frBC_float32 ? RTLTYPE_V2_FLOAT32 : RTLTYPE_V2_FLOAT64); const int frA = get_fpr_as_type(ctx, insn_frA(insn), frA_type); const int frC = get_fpr_as_type(ctx, insn_frC(insn), frBC_type); const int frB = get_fpr_as_type(ctx, insn_frB(insn), frBC_type); const int zero = rtl_alloc_register(unit, frA_scalar_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, zero, 0, 0, 0); int frD_ps[2]; for (int slot = 0; slot < 2; slot++) { const int frA_ps = rtl_alloc_register(unit, frA_scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frA_ps, frA, 0, slot); const int frC_ps = rtl_alloc_register(unit, frBC_scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frC_ps, frC, 0, slot); const int frB_ps = rtl_alloc_register(unit, frBC_scalar_type); rtl_add_insn(unit, RTLOP_VEXTRACT, frB_ps, frB, 0, slot); const int test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, test, frA_ps, zero, RTLFCMP_GE); frD_ps[slot] = rtl_alloc_register(unit, frBC_scalar_type); rtl_add_insn(unit, RTLOP_SELECT, frD_ps[slot], frC_ps, frB_ps, test); } const int frD = rtl_alloc_register(unit, frBC_type); rtl_add_insn(unit, RTLOP_VBUILD2, frD, frD_ps[0], frD_ps[1], 0); set_fpr(ctx, insn_frD(insn), frD); if (fpstate) { rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); } if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_ps_sum: Translate a ps_sum* instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * index: Slot (0 or 1) which receives frA[ps0] + frB[ps1]. */ static void translate_ps_sum(GuestPPCContext *ctx, uint32_t insn, int index) { RTLUnit * const unit = ctx->unit; const bool use_float32 = (get_fpr_scalar_type(ctx, insn_frA(insn)) == RTLTYPE_FLOAT32); const RTLDataType type = use_float32 ? RTLTYPE_FLOAT32 : RTLTYPE_FLOAT64; const int frA = get_fpr_as_type(ctx, insn_frA(insn), type); const int frB = get_ps1(ctx, insn_frB(insn), type); int frC; if (index == 0) { frC = get_ps1(ctx, insn_frC(insn), RTLTYPE_FLOAT32); } else { frC = get_fpr_as_type(ctx, insn_frC(insn), RTLTYPE_FLOAT32); } int sum = rtl_alloc_register(unit, type); rtl_add_insn(unit, RTLOP_FADD, sum, frA, frB, 0); if (!use_float32) { const int sum64 = sum; sum = rtl_alloc_register(unit, RTLTYPE_FLOAT32); rtl_add_insn(unit, RTLOP_FCVT, sum, sum64, 0, 0); } check_fp_underflow(ctx, sum, RTLOP_FADD, frA, frB, 0, true, false, use_float32); /* If an invalid-operation exception occurred and native NaNs are * not enabled, we have to process the result as a scalar in order * to get the generated NaN (if any) in the proper place. */ if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE) && !(ctx->handle->common_opt & BINREC_OPT_NATIVE_IEEE_NAN)) { const int fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); const int invalid = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FTESTEXC, invalid, fpstate, 0, RTLFEXC_INVALID); const int label_do_result = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, invalid, 0, label_do_result); set_fp_result(ctx, insn_frD(insn), sum, index, frA, frB, 0, 0, FPSCR_VXISI, true, false, true, true); const int fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_GET_ALIAS, fpscr, 0, 0, ctx->alias.fpscr); const int has_ve = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, has_ve, fpscr, 0, FPSCR_VE); const int label_skip_result = rtl_alloc_label(unit); rtl_add_insn(unit, RTLOP_GOTO_IF_NZ, 0, has_ve, 0, label_skip_result); /* Suppress exceptions from the cast operation. */ const int fcast_fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fcast_fpstate, 0, 0, 0); const int frD_ps = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_GET_ALIAS, frD_ps, 0, 0, ctx->alias.fpr[insn_frD(insn)]); const int sum_64 = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_VEXTRACT, sum_64, frD_ps, 0, 0); const bool frC_snan_safe = (ctx->handle->guest_opt & BINREC_OPT_G_PPC_ASSUME_NO_SNAN) != 0; const int frC_64 = fcast_32to64(unit, frC, !frC_snan_safe); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fcast_fpstate, 0, 0); const int nan_result = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT64); rtl_add_insn(unit, RTLOP_VBUILD2, nan_result, index==0 ? sum_64 : frC_64, index==0 ? frC_64 : sum_64, 0); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, nan_result, 0, ctx->alias.fpr[insn_frD(insn)]); rtl_add_insn(unit, RTLOP_GOTO, 0, 0, 0, label_skip_result); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_do_result); const int result = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, result, index==0 ? sum : frC, index==0 ? frC : sum, 0); /* Don't pass VXISI here because it'll confuse the default NaN * generator. This is only reached if VX was not raised, so we * don't need to specify any exceptions anyway. */ set_fp_result(ctx, insn_frD(insn), result, index, frA, frB, 0, 0, 0, false, false, true, false); rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label_skip_result); } else { /* If we don't have to worry about default NaNs, we can just set * the result directly. */ const int result = rtl_alloc_register(unit, RTLTYPE_V2_FLOAT32); rtl_add_insn(unit, RTLOP_VBUILD2, result, index==0 ? sum : frC, index==0 ? frC : sum, 0); set_fp_result(ctx, insn_frD(insn), result, index, frA, frB, 0, 0, FPSCR_VXISI, true, false, true, false); } if (insn_Rc(insn)) { update_cr1(ctx); } } /*-----------------------------------------------------------------------*/ /** * translate_rotate_mask: Translate a rlwinm, rlwnm, or rlwimi instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * is_imm: True if the shift count is an immediate value, false if rB. * insert: True for a rlwimi instruction. */ static void translate_rotate_mask( GuestPPCContext *ctx, uint32_t insn, bool is_imm, bool insert) { RTLUnit * const unit = ctx->unit; const int SH = insn_SH(insn); const int MB = insn_MB(insn); const int ME = insn_ME(insn); const int rS = get_gpr(ctx, insn_rS(insn)); int result; if (MB == ((ME + 1) & 31)) { // rotlw/rotlwi if (is_imm) { if (SH == 0) { result = rS; } else { result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_RORI, result, rS, 0, 32-SH); } } else { const int rB = get_gpr(ctx, insn_rB(insn)); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ROL, result, rS, rB, 0); } } else if (is_imm && !insert && MB == 0 && ME == 31-SH) { // slwi result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, result, rS, 0, SH); } else if (is_imm && !insert && MB == 32-SH && ME == 31) { // srwi result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, result, rS, 0, 32-SH); } else if (insert) { ASSERT(is_imm); const int rA = get_gpr(ctx, insn_rA(insn)); if (MB <= ME) { const int start = 31 - ME; const int count = ME - MB + 1; const int base = rA; int value; if (SH == start) { value = rS; } else { value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_RORI, value, rS, 0, ((32-SH) + start) & 31); } result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, result, base, value, start | count<<8); } else { const int start = 32 - MB; const int count = MB - ME - 1; ASSERT(count > 0); // Or else it would be rotlwi. int rS_rotated; if (SH == 0) { rS_rotated = rS; } else { rS_rotated = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_RORI, rS_rotated, rS, 0, 32-SH); } const uint32_t mask = ((1 << count) - 1) << start; const int rS_masked = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, rS_masked, rS_rotated, 0, ~mask); const int rA_masked = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, rA_masked, rA, 0, mask); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, rS_masked, rA_masked, 0); } } else { /* MASK function for little-endian bit numbering. Assumes mb >= me. */ #define MASK_LE(mb, me) \ ((uint32_t)((UINT64_C(1) << ((mb)-(me)+1)) - 1) << (me)) const int mb = 31 - MB; const int me = 31 - ME; const uint32_t mask = (mb < me ? ~MASK_LE((me-1) & 31, (mb+1) & 31) : MASK_LE(mb, me)); #undef MASK_LE int rotated; if (is_imm) { if (SH == 0) { rotated = rS; } else { rotated = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_RORI, rotated, rS, 0, 32-SH); } } else { const int rB = get_gpr(ctx, insn_rB(insn)); rotated = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ROL, rotated, rS, rB, 0); } result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, result, rotated, 0, mask); } set_gpr(ctx, insn_rA(insn), result); if (insn_Rc(insn)) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_shift: Translate a bit-shift instruction. * * [Parameters] * ctx: Translation context. * insn: Instruction word. * rtlop: RTL instruction to perform the operation. * is_imm: True if the shift count is an immediate value, false if rB. * is_sra: True if the shift is an arithmetic right shift (sets XER[CA]). */ static void translate_shift( GuestPPCContext *ctx, uint32_t insn, RTLOpcode rtlop, bool is_imm, bool is_sra) { RTLUnit * const unit = ctx->unit; int rS = get_gpr(ctx, insn_rS(insn)); int count, result; if (is_imm) { count = 0; // Not used, but avoid a "may be uninitialized" warning. result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, rtlop, result, rS, 0, insn_SH(insn)); } else { const int rB = get_gpr(ctx, insn_rB(insn)); count = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, count, rB, 0, 63); const int rS_64 = rtl_alloc_register(unit, RTLTYPE_INT64); if (is_sra) { rtl_add_insn(unit, RTLOP_SCAST, rS_64, rS, 0, 0); } else { rtl_add_insn(unit, RTLOP_ZCAST, rS_64, rS, 0, 0); } rS = rS_64; const int result_64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, rtlop, result_64, rS, count, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, result, result_64, 0, 0); } set_gpr(ctx, insn_rA(insn), result); if (is_sra) { int test; if (is_imm) { const uint32_t mask = (1 << insn_SH(insn)) - 1; test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, test, rS, 0, mask); } else { const int one = rtl_imm64(unit, 1); const int shifted_one = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_SLL, shifted_one, one, count, 0); const int mask = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ADDI, mask, shifted_one, 0, -1); test = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_AND, test, rS, mask, 0); } const int has_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTUI, has_bits, test, 0, 0); const int is_neg = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTSI, is_neg, rS, 0, 0); const int ca = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_AND, ca, has_bits, is_neg, 0); const int xer = get_xer(ctx); const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_xer, xer, ca, XER_CA_SHIFT | 1<<8); set_xer(ctx, new_xer, -1); } if (insn_Rc(insn)) { update_cr0(ctx, result); } } /*-----------------------------------------------------------------------*/ /** * translate_stwcx: Translate a stwcx. instruction. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * insn: Instruction word. */ static void translate_stwcx( GuestPPCContext *ctx, uint32_t address, uint32_t insn) { const binrec_t *handle = ctx->handle; RTLUnit * const unit = ctx->unit; const int psb_reg = ctx->psb_reg; const bool is_paired = (address == ctx->blocks[ctx->current_block].paired_stwcx); if (is_paired && ctx->live.gpr[insn_rS(insn)] == ctx->paired_lwarx_data) { /* We're just storing back the same value that was loaded. This * pattern is probably the result of compiler quirks, but for our * purposes, it's semantically equivalent to a no-op, so we omit * the potentially costly compare-and-exchange operation and act * as if the store succeeded. */ const int zero = rtl_imm32(unit, 0); const int so = get_xer_so(ctx); rtl_add_insn(unit, RTLOP_STORE_I8, 0, psb_reg, zero, handle->setup.state_offsets_ppc.reserve_flag); if (ctx->use_split_fields) { const int one = rtl_imm32(unit, 1); set_crf(ctx, 0, zero, zero, one, so); } else { const int old_cr = get_cr(ctx); const int temp = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, temp, so, 0, 2); const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_cr, old_cr, temp, 28 | 4<<8); set_cr(ctx, new_cr); } return; } const int skip_label = rtl_alloc_label(unit); int flag; if (is_paired) { /* If this instruction is part of an optimized pair, we don't need * to test reserve_flag. We still clear it, though, just in case * it was set on entry to the block. */ flag = 0; } else { flag = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD_U8, flag, psb_reg, 0, handle->setup.state_offsets_ppc.reserve_flag); } const int zero = rtl_imm32(unit, 0); const int so = get_xer_so(ctx); if (ctx->use_split_fields) { set_crf(ctx, 0, zero, zero, zero, so); /* Flush CR0.eq because of the conditional branches. */ ctx->live.crb[2] = 0; ctx->last_set.crb[2] = -1; ctx->crb_dirty |= 1 << 2; } else { /* Optimize non-split-field set_crf() since we know the high * three bits are zero. */ const int old_cr = get_cr(ctx); const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_cr, old_cr, so, 28 | 4<<8); set_cr(ctx, new_cr); /* Flush CR, as above. */ ctx->live.cr = 0; ctx->last_set.cr = -1; } if (!is_paired) { rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, flag, 0, skip_label); } rtl_add_insn(unit, RTLOP_STORE_I8, 0, psb_reg, zero, handle->setup.state_offsets_ppc.reserve_flag); int old_value; if (is_paired) { old_value = ctx->paired_lwarx_data_be; } else { old_value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_LOAD, old_value, psb_reg, 0, handle->setup.state_offsets_ppc.reserve_state); } int new_value = get_gpr(ctx, insn_rS(insn)); if (handle->host_little_endian) { const int temp = new_value; new_value = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BSWAP, new_value, temp, 0, 0); } const int host_address = get_ea_indexed(ctx, insn, NULL); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_CMPXCHG, result, host_address, old_value, new_value); const int success = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, success, result, old_value, 0); rtl_add_insn(unit, RTLOP_GOTO_IF_Z, 0, success, 0, skip_label); if (ctx->use_split_fields) { rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, rtl_imm32(unit, 1), 0, ctx->alias.crb[2]); } else { const int old_cr = get_cr(ctx); const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, new_cr, old_cr, 0, 1<<29); set_cr(ctx, new_cr); ctx->live.cr = 0; ctx->last_set.cr = -1; } rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, skip_label); } /*-----------------------------------------------------------------------*/ /** * translate_trap: Translate a tw or twi instruction. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * insn: Instruction word. * rB: RTL register containing second comparison value (rB or immediate). */ static void translate_trap( GuestPPCContext *ctx, uint32_t address, uint32_t insn, int rB) { RTLUnit * const unit = ctx->unit; const int TO = insn_TO(insn); if (!TO) { return; // Effectively a NOP. } const int label = rtl_alloc_label(unit); const int rA = get_gpr(ctx, insn_rA(insn)); int result; RTLOpcode skip_op; switch (TO) { case TO_GTU: // 0x01 result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTU, result, rA, rB, 0); skip_op = RTLOP_GOTO_IF_Z; break; case TO_LTU: // 0x02 result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTU, result, rA, rB, 0); skip_op = RTLOP_GOTO_IF_Z; break; case TO_LTU | TO_GTU: // 0x03 case TO_GTS | TO_LTU | TO_GTU: // 0x0B case TO_LTS | TO_LTU | TO_GTU: // 0x13 case TO_LTS | TO_GTS: // 0x18 case TO_LTS | TO_GTS | TO_GTU: // 0x19 case TO_LTS | TO_GTS | TO_LTU: // 0x1A case TO_LTS | TO_GTS | TO_LTU | TO_GTU: // 0x1B result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, result, rA, rB, 0); skip_op = RTLOP_GOTO_IF_NZ; break; case TO_EQ: // 0x04 result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQ, result, rA, rB, 0); skip_op = RTLOP_GOTO_IF_Z; break; case TO_GTU | TO_EQ: // 0x05 result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTU, result, rA, rB, 0); skip_op = RTLOP_GOTO_IF_NZ; break; case TO_LTU | TO_EQ: // 0x06 result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTU, result, rA, rB, 0); skip_op = RTLOP_GOTO_IF_NZ; break; case TO_GTS: // 0x08 case TO_GTS | TO_GTU: // 0x09 case TO_GTS | TO_LTU: // 0x0A result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTS, result, rA, rB, 0); if (TO & TO_GTU) { const int result1 = result; const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); } else if (TO & TO_LTU) { const int result1 = result; const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); } skip_op = RTLOP_GOTO_IF_Z; break; case TO_GTS | TO_EQ: // 0x0C case TO_GTS | TO_EQ | TO_GTU: // 0x0D case TO_GTS | TO_EQ | TO_LTU: // 0x0E result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTS, result, rA, rB, 0); if (TO & TO_GTU) { const int result1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, result1, result, 0, 1); const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); skip_op = RTLOP_GOTO_IF_Z; } else if (TO & TO_LTU) { const int result1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, result1, result, 0, 1); const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); skip_op = RTLOP_GOTO_IF_Z; } else { skip_op = RTLOP_GOTO_IF_NZ; } break; case TO_LTS: // 0x10 case TO_LTS | TO_GTU: // 0x11 case TO_LTS | TO_LTU: // 0x12 result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTS, result, rA, rB, 0); if (TO & TO_GTU) { const int result1 = result; const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); } else if (TO & TO_LTU) { const int result1 = result; const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); } skip_op = RTLOP_GOTO_IF_Z; break; case TO_LTS | TO_EQ: // 0x14 case TO_LTS | TO_EQ | TO_GTU: // 0x15 case TO_LTS | TO_EQ | TO_LTU: // 0x16 result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTS, result, rA, rB, 0); if (TO & TO_GTU) { const int result1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, result1, result, 0, 1); const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SGTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); skip_op = RTLOP_GOTO_IF_Z; } else if (TO & TO_LTU) { const int result1 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, result1, result, 0, 1); const int result2 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLTU, result2, rA, rB, 0); result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, result, result1, result2, 0); skip_op = RTLOP_GOTO_IF_Z; } else { skip_op = RTLOP_GOTO_IF_NZ; } break; default: ASSERT((TO & (TO_LTS|TO_GTS|TO_EQ)) == (TO_LTS|TO_GTS|TO_EQ) || (TO & (TO_LTU|TO_GTU|TO_EQ)) == (TO_LTU|TO_GTU|TO_EQ)); result = 0; skip_op = RTLOP_NOP; break; } if (result) { rtl_add_insn(unit, skip_op, 0, result, 0, label); } flush_live_regs(ctx, false); guest_ppc_flush_cr(ctx, false); guest_ppc_flush_fpscr(ctx); set_nia_imm(ctx, address); post_insn_callback(ctx, address); const int trap_handler = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD, trap_handler, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.trap_handler); const int new_psb = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_CALL, new_psb, trap_handler, ctx->psb_reg, 0); rtl_add_insn(unit, RTLOP_RETURN, 0, new_psb, 0, 0); if (result) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, label); } } /*-----------------------------------------------------------------------*/ /** * translate_unimplemented_insn: Handle translation for an instruction * not supported by the translator. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * insn: Instruction word. */ static void translate_unimplemented_insn( GuestPPCContext *ctx, uint32_t address, uint32_t insn) { log_warning(ctx->handle, "Unsupported instruction %08X at address 0x%X," " treating as invalid", insn, address); rtl_add_insn(ctx->unit, RTLOP_ILLEGAL, 0, 0, 0, 0); } /*-----------------------------------------------------------------------*/ /** * translate_x1F: Translate the given opcode-0x1F instruction. * * [Parameters] * ctx: Translation context. * block: Basic block being translated. * address: Address of instruction being translated. * insn: Instruction word. */ static inline void translate_x1F( GuestPPCContext * const ctx, GuestPPCBlockInfo * const block, const uint32_t address, const uint32_t insn) { RTLUnit * const unit = ctx->unit; switch ((PPCExtendedOpcode1F)insn_XO_10(insn)) { /* XO_5 = 0x00 */ case XO_CMP: translate_compare(ctx, insn, false, true); return; case XO_CMPL: translate_compare(ctx, insn, false, false); return; case XO_MCRXR: { const int xer = get_xer(ctx); int crb[4]; for (int bit = 0; bit < 4; bit++) { crb[bit] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[bit], xer, 0, (31-bit) | (1<<8)); } set_crf(ctx, insn_crfD(insn), crb[0], crb[1], crb[2], crb[3]); const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, new_xer, xer, 0, 0x0FFFFFFF); set_xer(ctx, new_xer, rtl_imm32(unit,0)); return; } // case XO_MCRXR /* XO_5 = 0x04 */ case XO_TW: translate_trap(ctx, address, insn, get_gpr(ctx, insn_rB(insn))); return; /* XO_5 = 0x08 */ case XO_SUBFC: case XO_SUBFCO: translate_addsub_reg(ctx, insn, 1, 1, true, true); return; case XO_SUBF: { const int rA = get_gpr(ctx, insn_rA(insn)); const int rB = get_gpr(ctx, insn_rB(insn)); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, result, rB, rA, 0); set_gpr(ctx, insn_rD(insn), result); if (insn_Rc(insn)) { update_cr0(ctx, result); } return; } // case XO_SUBF case XO_SUBFO: translate_addsub_reg(ctx, insn, 1, 1, true, false); return; case XO_NEG: { const int rA = get_gpr(ctx, insn_rA(insn)); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_NEG, result, rA, 0, 0); set_gpr(ctx, insn_rD(insn), result); if (insn_Rc(insn)) { update_cr0(ctx, result); } return; } // case XO_NEG case XO_NEGO: translate_addsub_reg(ctx, insn, 0, 1, true, false); return; case XO_SUBFE: case XO_SUBFEO: translate_addsub_reg(ctx, insn, 1, -1, true, true); return; case XO_SUBFZE: case XO_SUBFZEO: translate_addsub_reg(ctx, insn, 0, -1, true, true); return; case XO_SUBFME: case XO_SUBFMEO: translate_addsub_reg(ctx, insn, -1, -1, true, true); return; /* XO_5 = 0x0A */ case XO_ADDC: case XO_ADDCO: translate_addsub_reg(ctx, insn, 1, 0, false, true); return; case XO_ADDE: case XO_ADDEO: translate_addsub_reg(ctx, insn, 1, -1, false, true); return; case XO_ADDZE: case XO_ADDZEO: translate_addsub_reg(ctx, insn, 0, -1, false, true); return; case XO_ADDME: case XO_ADDMEO: translate_addsub_reg(ctx, insn, -1, -1, false, true); return; case XO_ADD: { const int rA = get_gpr(ctx, insn_rA(insn)); const int rB = get_gpr(ctx, insn_rB(insn)); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ADD, result, rA, rB, 0); set_gpr(ctx, insn_rD(insn), result); if (insn_Rc(insn)) { update_cr0(ctx, result); } return; } // case XO_ADD case XO_ADDO: translate_addsub_reg(ctx, insn, 1, 0, false, false); return; /* XO_5 = 0x0B */ case XO_MULHWU: case XO_UNDOCUMENTED_MULHWUO: // OE is ignored. translate_muldiv_reg(ctx, insn, RTLOP_MULHU, false); return; case XO_MULHW: case XO_UNDOCUMENTED_MULHWO: // OE is ignored. translate_muldiv_reg(ctx, insn, RTLOP_MULHS, false); return; case XO_MULLW: translate_muldiv_reg(ctx, insn, RTLOP_MUL, false); return; case XO_MULLWO: translate_muldiv_reg(ctx, insn, RTLOP_MUL, true); return; case XO_DIVWU: translate_muldiv_reg(ctx, insn, RTLOP_DIVU, false); return; case XO_DIVWUO: translate_muldiv_reg(ctx, insn, RTLOP_DIVU, true); return; case XO_DIVW: translate_muldiv_reg(ctx, insn, RTLOP_DIVS, false); return; case XO_DIVWO: translate_muldiv_reg(ctx, insn, RTLOP_DIVS, true); return; /* XO_5 = 0x10 */ case XO_MTCRF: { const int rS = get_gpr(ctx, insn_rS(insn)); if (insn_CRM(insn) == 0xFF) { set_cr(ctx, rS); } if (ctx->use_split_fields) { for (int i = 0; i < 8; i++) { if (insn_CRM(insn) & (0x80 >> i)) { int crb[4]; for (int j = 0; j < 4; j++) { const int bit = i*4+j; if (ctx->alias.crb[bit]) { crb[j] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[j], rS, 0, (31-bit) | (1<<8)); } else { crb[j] = 0; } } for (int j = 0; j < 4; j++) { if (crb[j]) { const int bit = i*4+j; set_crb(ctx, bit, crb[j]); } } } } } else { // !ctx->use_split_fields if (insn_CRM(insn) != 0xFF) { const uint32_t mask = crm_to_mask(insn_CRM(insn)); const int old_cr = get_cr(ctx); const int masked_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_cr, old_cr, 0, ~mask); const int masked_rS = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_rS, rS, 0, mask); const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_cr, masked_cr, masked_rS, 0); set_cr(ctx, new_cr); } } return; } // case XO_MTCRF /* XO_5 = 0x12 */ case XO_MTMSR: case XO_MTSR: case XO_MTSRIN: case XO_TLBIE: translate_unimplemented_insn(ctx, address, insn); return; /* XO_5 = 0x13 */ case XO_MFCR: { /* Optimize an mfcr+rlwinm pair which extracts a single bit from CR. * We can only trivially skip the mfcr if the rlwinm reads and * writes the same register. */ const bool can_skip_insn = (!ctx->handle->pre_insn_callback && !ctx->handle->post_insn_callback); const uint32_t next_insn = can_skip_insn ? guest_ppc_get_insn_at(ctx, block, address+4) : 0; const uint32_t extract_bit_insn = OPCD_RLWINM<<26 | insn_rD(insn)<<21 | 31<<6 | 31<<1; const uint32_t extract_bit_same_reg_insn = extract_bit_insn | insn_rD(insn)<<16; if ((next_insn & 0xFFFF07FE) != extract_bit_same_reg_insn) { guest_ppc_flush_cr(ctx, true); set_gpr(ctx, insn_rD(insn), get_cr(ctx)); } if ((next_insn & 0xFFE007FE) == extract_bit_insn) { const int bit_index = (insn_SH(next_insn) - 1) & 31; const int bit = get_crb(ctx, bit_index); set_gpr(ctx, insn_rA(next_insn), bit); if (insn_Rc(next_insn)) { update_cr0(ctx, bit); } ctx->skip_next_insn = true; } return; } // case XO_MFCR case XO_MFTB: case XO_MFSPR: translate_move_spr(ctx, address, insn, false); return; case XO_MTSPR: translate_move_spr(ctx, address, insn, true); return; case XO_MFMSR: case XO_MFSR: case XO_MFSRIN: translate_unimplemented_insn(ctx, address, insn); return; /* XO_5 = 0x14 */ case XO_LWARX: translate_lwarx(ctx, address, insn); return; /* XO_5 = 0x15 */ case XO_LSWX: translate_load_store_string(ctx, insn, false, false); return; case XO_LSWI: translate_load_store_string(ctx, insn, false, true); return; case XO_STSWX: translate_load_store_string(ctx, insn, true, false); return; case XO_STSWI: translate_load_store_string(ctx, insn, true, true); return; /* XO_5 = 0x16 */ case XO_DCBST: case XO_DCBF: case XO_DCBTST: case XO_DCBT: case XO_DCBI: // FIXME: We currently act as if there is no data cache. return; case XO_STWCX_: translate_stwcx(ctx, address, insn); /* If split fields are in use and the post-instruction callback * is active, flush the store success bit back to the CR word in * the PSB, so the callback knows whether the store succeeded. * This deviates from the ideal of not changing behavior in the * presence of pre/post instruction callbacks, but it is necessary * when the callbacks are used to validate the behavior of * generated code against a hardware implementation or interpreter * so the validator knows whether to simulate the store (since * stwcx. is not repeatable). */ if (ctx->use_split_fields && ctx->handle->post_insn_callback) { const int cr0_eq = get_crb(ctx, 2); const int old_cr = get_cr(ctx); const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_cr, old_cr, cr0_eq, 29 | 1<<8); set_cr(ctx, new_cr); } return; case XO_ECIWX: case XO_ECOWX: case XO_TLBSYNC: translate_unimplemented_insn(ctx, address, insn); return; case XO_LWBRX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_BR, false, true, false); return; case XO_SYNC: case XO_EIEIO: // FIXME: We currently act as if all loads and stores are sequential. return; case XO_STWBRX: translate_load_store_gpr(ctx, insn, RTLOP_STORE_BR, true, true, false); return; case XO_LHBRX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U16_BR, false, true, false); return; case XO_STHBRX: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I16_BR, true, true, false); return; case XO_ICBI: /* icbi implies that already-translated code may have changed, so * unconditionally return from this unit. We currently don't * bother checking the invalidation address. */ return_from_unit(ctx, address, rtl_imm32(unit, address+4), true); return; case XO_DCBZ: translate_dcbz(ctx, insn); return; /* XO_5 = 0x17 */ case XO_LWZX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD, false, true, false); return; case XO_LWZUX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD, false, true, true); return; case XO_LBZX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U8, false, true, false); return; case XO_LBZUX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U8, false, true, true); return; case XO_STWX: translate_load_store_gpr(ctx, insn, RTLOP_STORE, true, true, false); return; case XO_STWUX: translate_load_store_gpr(ctx, insn, RTLOP_STORE, true, true, true); return; case XO_STBX: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I8, true, true, false); return; case XO_STBUX: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I8, true, true, true); return; case XO_LHZX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U16, false, true, false); return; case XO_LHZUX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U16, false, true, true); return; case XO_LHAX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_S16, false, true, false); return; case XO_LHAUX: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_S16, false, true, true); return; case XO_STHX: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I16, true, true, false); return; case XO_STHUX: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I16, true, true, true); return; case XO_LFSX: translate_load_store_fpr(ctx, insn, true, false, true, false); return; case XO_LFSUX: translate_load_store_fpr(ctx, insn, true, false, true, true); return; case XO_LFDX: translate_load_store_fpr(ctx, insn, false, false, true, false); return; case XO_LFDUX: translate_load_store_fpr(ctx, insn, false, false, true, true); return; case XO_STFSX: translate_load_store_fpr(ctx, insn, true, true, true, false); return; case XO_STFSUX: translate_load_store_fpr(ctx, insn, true, true, true, true); return; case XO_STFDX: translate_load_store_fpr(ctx, insn, false, true, true, false); return; case XO_STFDUX: translate_load_store_fpr(ctx, insn, false, true, true, true); return; case XO_STFIWX: { const RTLOpcode rtlop = ctx->handle->host_little_endian ? RTLOP_STORE_BR : RTLOP_STORE; const int host_address = get_ea_indexed(ctx, insn, NULL); const int f64 = get_fpr_as_type(ctx, insn_frD(insn), RTLTYPE_FLOAT64); const int i64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, i64, f64, 0, 0); const int i32 = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, i32, i64, 0, 0); rtl_add_insn(unit, rtlop, 0, host_address, i32, 0); return; } // case XO_STFIWX /* XO_5 = 0x18 */ case XO_SLW: translate_shift(ctx, insn, RTLOP_SLL, false, false); return; case XO_SRW: translate_shift(ctx, insn, RTLOP_SRL, false, false); return; case XO_SRAW: translate_shift(ctx, insn, RTLOP_SRA, false, true); return; case XO_SRAWI: translate_shift(ctx, insn, RTLOP_SRAI, true, true); return; /* XO_5 = 0x1A */ case XO_CNTLZW: if (!ctx->handle->pre_insn_callback && !ctx->handle->post_insn_callback && !insn_Rc(insn) && ((guest_ppc_get_insn_at(ctx, block, address+4) & 0xFFE0FFFE) == (OPCD_RLWINM<<26 | insn_rA(insn)<<21 | 27<<11 | 5<<6 | 31<<1))) { /* "cntlzw temp,rX; srwi rY,temp,5" is a common PowerPC idiom * for comparing a value to zero and getting the result as an * integer rather than a condition flag. If the temporary is * different from the output registers, we leave the cntlzw in * place in case its result happens to also be used elsewhere; * dead store elimination will remove it if not. */ const uint32_t next_insn = guest_ppc_get_insn_at(ctx, block, address+4); const int cntlzw_rS = insn_rS(insn); const int rlwinm_rA = insn_rA(next_insn); const int value = get_gpr(ctx, cntlzw_rS); /* Don't append the CLZ until after we retrieve the input * operand value, to correctly handle the case of cntlzw rN,rN * (overwriting the input operand). */ if ((int)insn_rA(insn) != rlwinm_rA) { translate_bitmisc(ctx, insn, RTLOP_CLZ); } const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SEQI, result, value, 0, 0); set_gpr(ctx, rlwinm_rA, result); if (insn_Rc(next_insn)) { const int lt = rtl_imm32(unit, 0); const int gt = result; const int eq = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_XORI, eq, result, 0, 1); const int so = get_xer_so(ctx); set_crf(ctx, 0, lt, gt, eq, so); } ctx->skip_next_insn = true; return; } translate_bitmisc(ctx, insn, RTLOP_CLZ); return; case XO_EXTSH: translate_bitmisc(ctx, insn, RTLOP_SEXT16); return; case XO_EXTSB: translate_bitmisc(ctx, insn, RTLOP_SEXT8); return; /* XO_5 = 0x1C */ case XO_AND: translate_logic_reg(ctx, insn, RTLOP_AND, false, false); return; case XO_ANDC: translate_logic_reg(ctx, insn, RTLOP_AND, true, false); return; case XO_NOR: translate_logic_reg(ctx, insn, RTLOP_OR, false, true); return; case XO_EQV: /* The PowerPC spec describes this as ~(rS ^ rB), but we implement * it as (rS ^ ~rB) since that allows the NOT operation to be * scheduled earlier if rB is already loaded. */ translate_logic_reg(ctx, insn, RTLOP_XOR, true, false); return; case XO_XOR: translate_logic_reg(ctx, insn, RTLOP_XOR, false, false); return; case XO_ORC: translate_logic_reg(ctx, insn, RTLOP_OR, true, false); return; case XO_OR: if (insn_rB(insn) == insn_rS(insn)) { // mr rA,rS const int rS = get_gpr(ctx, insn_rS(insn)); set_gpr(ctx, insn_rA(insn), rS); if (insn_Rc(insn)) { update_cr0(ctx, rS); } } else { translate_logic_reg(ctx, insn, RTLOP_OR, false, false); } return; case XO_NAND: translate_logic_reg(ctx, insn, RTLOP_AND, false, true); return; } translate_illegal(ctx, insn); } /*-----------------------------------------------------------------------*/ /** * translate_x3F: Translate the given opcode-0x3F instruction. * * [Parameters] * ctx: Translation context. * address: Address of instruction being translated. * insn: Instruction word. */ static inline void translate_x3F( GuestPPCContext * const ctx, const uint32_t address, const uint32_t insn) { RTLUnit * const unit = ctx->unit; if (insn_XO_5(insn) & 0x10) { switch ((PPCExtendedOpcode3F_5)insn_XO_5(insn)) { case XO_FDIV: translate_fp_arith(ctx, insn, RTLOP_FDIV, false, FPSCR_VXIDI | FPSCR_VXZDZ); return; case XO_FSUB: { int fcfi_src; bool fcfi_signed; if ((ctx->handle->guest_opt & BINREC_OPT_G_PPC_DETECT_FCFI_EMUL) && guest_ppc_detect_fcfi_emul(ctx, address, insn_frA(insn), insn_frB(insn), &fcfi_src, &fcfi_signed)) { const int result = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, fcfi_signed ? RTLOP_FSCAST : RTLOP_FZCAST, result, fcfi_src, 0, 0); set_fpr(ctx, insn_frD(insn), result); ctx->fpr_is_safe |= 1 << insn_frD(insn); /* Handle FPSCR ourselves instead of going through * set_fp_result() because converting INT32 to FLOAT64 * can never raise exceptions. */ if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { const int fprf = gen_fprf(unit, result, 0); set_fr_fi_fprf(ctx, fprf); } } else { translate_fp_arith(ctx, insn, RTLOP_FSUB, false, FPSCR_VXISI); } return; } // case XO_FSUB case XO_FADD: translate_fp_arith(ctx, insn, RTLOP_FADD, false, FPSCR_VXISI); return; case XO_FSEL: { /* fsel does not raise any exceptions, so make sure we don't * affect host exception state with FCMP. (But we don't need * to bother with this if we're ignoring exceptions completely.) */ int fpstate = 0; if (!(ctx->handle->guest_opt & BINREC_OPT_G_PPC_NO_FPSCR_STATE)) { fpstate = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, fpstate, 0, 0, 0); } /* There's no need to convert frA to float64 if it's currently * float32, since all we do is test its value. */ const RTLDataType frA_type = get_fpr_scalar_type(ctx, insn_frA(insn)); const int frA = get_fpr_as_type(ctx, insn_frA(insn), frA_type); const int zero = rtl_alloc_register(unit, frA_type); rtl_add_insn(unit, RTLOP_LOAD_IMM, zero, 0, 0, 0); const int frC = get_fpr_as_type(ctx, insn_frC(insn), RTLTYPE_FLOAT64); const int frB = get_fpr_as_type(ctx, insn_frB(insn), RTLTYPE_FLOAT64); const int test = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_FCMP, test, frA, zero, RTLFCMP_GE); const int result = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_SELECT, result, frC, frB, test); set_fpr(ctx, insn_frD(insn), result); if (fpstate) { rtl_add_insn(unit, RTLOP_FSETSTATE, 0, fpstate, 0, 0); } if (insn_Rc(insn)) { update_cr1(ctx); } return; } // case XO_FSEL case XO_FMUL: translate_fp_arith(ctx, insn, RTLOP_FMUL, false, FPSCR_VXIMZ); return; case XO_FRSQRTE: translate_fp_recip(ctx, insn, true); return; case XO_FMSUB: translate_fp_fma(ctx, insn, RTLOP_FMSUB, false, false); return; case XO_FMADD: translate_fp_fma(ctx, insn, RTLOP_FMADD, false, false); return; case XO_FNMSUB: /* The PowerPC fnmsub instruction negates the final result * rather than just the intermediate product. We could * potentially use RTLOP_FNMADD instead of RTLOP_FNMSUB, * but that gives the wrong sign of zero, so (unless the * relevant optimization is enabled) we have to use * RTLOP_FMSUB and manually negate the result. */ if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FNMADD_ZERO_SIGN) { translate_fp_fma(ctx, insn, RTLOP_FNMADD, false, false); } else { translate_fp_fma(ctx, insn, RTLOP_FMSUB, false, true); } return; case XO_FNMADD: if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FNMADD_ZERO_SIGN) { translate_fp_fma(ctx, insn, RTLOP_FNMSUB, false, false); } else { translate_fp_fma(ctx, insn, RTLOP_FMADD, false, true); } return; } } else { // !(insn_XO_5(insn) & 0x10) switch ((PPCExtendedOpcode3F_10)insn_XO_10(insn)) { case XO_FCMPU: translate_compare_fp(ctx, insn, false, 0); return; case XO_FCMPO: translate_compare_fp(ctx, insn, true, 0); return; case XO_MCRFS: { int crb[4]; if (insn_crfS(insn) == 4) { const int fprf = get_fr_fi_fprf(ctx); for (int i = 0; i < 4; i++) { crb[i] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[i], fprf, 0, (3-i) | 1<<8); } set_crf(ctx, insn_crfD(insn), crb[0], crb[1], crb[2], crb[3]); } else { // crfS != 4 const int crfS_bit = insn_crfS(insn) * 4; const int fpscr = get_fpscr(ctx); if (insn_crfS(insn) == 0) { get_fpscr_fex_vx(ctx, fpscr, &crb[1], &crb[2]); crb[0] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[0], fpscr, 0, 31 | 1<<8); crb[3] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[3], fpscr, 0, 28 | 1<<8); } else if (insn_crfS(insn) == 3) { const int fprf = get_fr_fi_fprf(ctx); crb[0] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[0], fpscr, 0, 19 | 1<<8); for (int i = 1; i < 4; i++) { crb[i] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[i], fprf, 0, (7-i) | 1<<8); } } else { for (int i = 0; i < 4; i++) { crb[i] = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, crb[i], fpscr, 0, (31-(crfS_bit+i)) | 1<<8); } } set_crf(ctx, insn_crfD(insn), crb[0], crb[1], crb[2], crb[3]); uint32_t mask = ((FPSCR_FX | FPSCR_ALL_EXCEPTIONS) & (0xF0000000 >> crfS_bit)); if (mask) { const int new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, new_fpscr, fpscr, 0, ~mask); set_fpscr(ctx, new_fpscr); } } return; } // case XO_MCRFS case XO_MTFSB1: case XO_MTFSB0: { const uint32_t crbD_mask = 1 << (31 - insn_crbD(insn)); if ((FPSCR_FR | FPSCR_FI | FPSCR_FPRF) & crbD_mask) { const int fprf = get_fr_fi_fprf(ctx); const int new_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); const uint32_t mask = 1 << (19 - insn_crbD(insn)); if (insn_XO_10(insn) == XO_MTFSB1) { rtl_add_insn(unit, RTLOP_ORI, new_fprf, fprf, 0, mask); } else { rtl_add_insn(unit, RTLOP_ANDI, new_fprf, fprf, 0, ~mask); } set_fr_fi_fprf(ctx, new_fprf); } else if ((FPSCR_FEX | FPSCR_VX | FPSCR_RESV20) & crbD_mask) { /* Do nothing -- these bits can't be written. */ } else { const int fpscr = get_fpscr(ctx); const int new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); if (insn_XO_10(insn) == XO_MTFSB1) { uint32_t mask = crbD_mask; if (FPSCR_ALL_EXCEPTIONS & crbD_mask) { mask |= FPSCR_FX; } rtl_add_insn(unit, RTLOP_ORI, new_fpscr, fpscr, 0, mask); } else { // mtfsb0 rtl_add_insn(unit, RTLOP_ANDI, new_fpscr, fpscr, 0, ~crbD_mask); } set_fpscr(ctx, new_fpscr); if (FPSCR_RN & crbD_mask) { update_rounding_mode(ctx); } } if (insn_Rc(insn)) { update_cr1(ctx); } return; } // case XO_MTFSB1, XO_MTFSB0 case XO_MTFSFI: { const int crfD = insn_crfD(insn); if (crfD == 0) { const int fpscr = get_fpscr(ctx); const int masked_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_fpscr, fpscr, 0, 0x0FFFFFFF); int new_fpscr; if (insn_IMM(insn) & 9) { new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, new_fpscr, masked_fpscr, 0, (insn_IMM(insn) & 9) << 28); } else { new_fpscr = masked_fpscr; } set_fpscr(ctx, new_fpscr); } else if (ctx->use_split_fields && crfD == 3) { const int fpscr = get_fpscr(ctx); const int new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); if (insn_IMM(insn) & 8) { /* The omission of FPSCR_FX here is deliberate, since * mtfsfi does not set FPSCR[FX] for nonzero crfD. */ rtl_add_insn(unit, RTLOP_ORI, new_fpscr, fpscr, 0, 1<<19); } else { rtl_add_insn(unit, RTLOP_ANDI, new_fpscr, fpscr, 0, ~(1<<19)); } set_fpscr(ctx, new_fpscr); const int fprf = get_fr_fi_fprf(ctx); const int masked_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_fprf, fprf, 0, 0x0F); int new_fprf; if (insn_IMM(insn) & 7) { new_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, new_fprf, masked_fprf, 0, (insn_IMM(insn) & 7) << 4); } else { new_fprf = masked_fprf; } set_fr_fi_fprf(ctx, new_fprf); } else if (ctx->use_split_fields && crfD == 4) { const int fprf = get_fr_fi_fprf(ctx); const int masked_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_fprf, fprf, 0, 0x70); int new_fprf; if (insn_IMM(insn)) { new_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, new_fprf, masked_fprf, 0, insn_IMM(insn)); } else { new_fprf = masked_fprf; } set_fr_fi_fprf(ctx, new_fprf); } else { // crfD not 0 (or 3/4 if use_split_fields) const int fpscr = get_fpscr(ctx); const int masked_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); uint32_t mask = 0xF0000000 >> (insn_crfD(insn) * 4); rtl_add_insn(unit, RTLOP_ANDI, masked_fpscr, fpscr, 0, ~mask); int imm = insn_IMM(insn); if (insn_crfD(insn) == 5) { imm &= 7; } int new_fpscr; if (insn_IMM(insn)) { new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ORI, new_fpscr, masked_fpscr, 0, imm << (28 - insn_crfD(insn)*4)); } else { new_fpscr = masked_fpscr; } set_fpscr(ctx, new_fpscr); if (insn_crfD(insn) == 7) { static const uint8_t rounding_mode[4] = { [FPSCR_RN_N] = RTLFROUND_NEAREST, [FPSCR_RN_Z] = RTLFROUND_TRUNC, [FPSCR_RN_P] = RTLFROUND_CEIL, [FPSCR_RN_M] = RTLFROUND_FLOOR, }; const int old_state = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FGETSTATE, old_state, 0, 0, 0); const int new_state = rtl_alloc_register(unit, RTLTYPE_FPSTATE); rtl_add_insn(unit, RTLOP_FSETROUND, new_state, old_state, 0, rounding_mode[imm & 3]); rtl_add_insn(unit, RTLOP_FSETSTATE, 0, new_state, 0, 0); } } // if (crfD == ...) if (insn_Rc(insn)) { update_cr1(ctx); } return; } // case XO_MTFSFI case XO_MFFS: { int fpscr = get_fpscr(ctx); int fex, vx; get_fpscr_fex_vx(ctx, fpscr, &fex, &vx); if (ctx->use_split_fields) { fpscr = merge_fpscr(ctx, true); } const int shifted_fex = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_fex, fex, 0, FPSCR_FEX_SHIFT); const int shifted_vx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SLLI, shifted_vx, vx, 0, FPSCR_VX_SHIFT); const int fex_vx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, fex_vx, shifted_fex, shifted_vx, 0); const int final_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, final_fpscr, fpscr, fex_vx, 0); const int fpscr64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_ZCAST, fpscr64, final_fpscr, 0, 0); const int result = rtl_alloc_register(unit, RTLTYPE_FLOAT64); rtl_add_insn(unit, RTLOP_BITCAST, result, fpscr64, 0, 0); set_fpr(ctx, insn_frD(insn), result); /* The value generated by mffs will never be a NaN, so we can * call it SNaN-safe even though there should never be a reason * to convert it to single precision. */ ctx->fpr_is_safe |= 1 << insn_frD(insn); if (insn_Rc(insn)) { /* We already have FEX/VX, so avoid recomputing them. */ const int fx = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, fx, fpscr, 0, 31 | 1<<8); const int ox = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, ox, fpscr, 0, 28 | 1<<8); set_crf(ctx, 1, fx, fex, vx, ox); } return; } // case XO_MFFS case XO_MTFSF: { const int frB = get_fpr_as_type(ctx, insn_frB(insn), RTLTYPE_FLOAT64); const int bits64 = rtl_alloc_register(unit, RTLTYPE_INT64); rtl_add_insn(unit, RTLOP_BITCAST, bits64, frB, 0, 0); const int bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ZCAST, bits, bits64, 0, 0); const int FM_fpscr_all = ctx->use_split_fields ? 0xF7 : 0xFF; const int FM_fpscr = insn_FM(insn) & FM_fpscr_all; const int FM_fprf = ctx->use_split_fields ? insn_FM(insn) & 0x18 : 0; if (FM_fpscr) { uint32_t fpscr_mask_off = FPSCR_FEX | FPSCR_VX | FPSCR_RESV20; if (ctx->use_split_fields) { fpscr_mask_off |= FPSCR_FR | FPSCR_FI | FPSCR_FPRF; } const uint32_t fpscr_mask = ~fpscr_mask_off; int new_fpscr; if (FM_fpscr == FM_fpscr_all) { new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, new_fpscr, bits, 0, fpscr_mask); } else { const uint32_t mask = crm_to_mask(FM_fpscr) & fpscr_mask; const int fpscr = get_fpscr(ctx); const int masked_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_bits, bits, 0, mask); const int masked_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_fpscr, fpscr, 0, ~mask); new_fpscr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_fpscr, masked_fpscr, masked_bits, 0); } set_fpscr(ctx, new_fpscr); if (FM_fpscr & 0x01) { update_rounding_mode(ctx); } } if (FM_fprf) { int new_fprf; if (FM_fprf == 0x18) { new_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFEXT, new_fprf, bits, 0, FPSCR_FPRF_SHIFT | 7<<8); } else { const int fprf = get_fr_fi_fprf(ctx); const int shifted_bits = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SRLI, shifted_bits, bits, 0, FPSCR_FPRF_SHIFT); const int masked_bits = rtl_alloc_register(unit, RTLTYPE_INT32); const int masked_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); if (FM_fprf == 0x10) { rtl_add_insn(unit, RTLOP_ANDI, masked_bits, shifted_bits, 0, 0x70); rtl_add_insn(unit, RTLOP_ANDI, masked_fprf, fprf, 0, 0x0F); } else { ASSERT(FM_fprf == 0x08); rtl_add_insn(unit, RTLOP_ANDI, masked_bits, shifted_bits, 0, 0x0F); rtl_add_insn(unit, RTLOP_ANDI, masked_fprf, fprf, 0, 0x70); } new_fprf = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_fprf, masked_fprf, masked_bits, 0); } set_fr_fi_fprf(ctx, new_fprf); } if (insn_Rc(insn)) { update_cr1(ctx); } return; } // case XO_MTFSF case XO_FNEG: translate_move_fpr(ctx, insn, RTLOP_FNEG, false); return; case XO_FMR: translate_move_fpr(ctx, insn, RTLOP_MOVE, false); return; case XO_FNABS: translate_move_fpr(ctx, insn, RTLOP_FNABS, false); return; case XO_FABS: translate_move_fpr(ctx, insn, RTLOP_FABS, false); return; case XO_FRSP: { const int frB = get_fpr_as_type(ctx, insn_frB(insn), RTLTYPE_FLOAT64); /* Check first for frsp of an emulated fcfi, which we can * convert to FSCAST/FZCAST of the original value. */ const int result = rtl_alloc_register(unit, RTLTYPE_FLOAT32); const RTLRegister *frB_reg = &unit->regs[frB]; if (frB_reg->source == RTLREG_RESULT && (frB_reg->result.opcode == RTLOP_FSCAST || frB_reg->result.opcode == RTLOP_FZCAST)) { rtl_add_insn(unit, frB_reg->result.opcode, result, frB_reg->result.src1, 0, 0); set_fp_result(ctx, insn_frD(insn), result, 0, 0, frB, 0, 0, 0, false, false, true, true); } else { rtl_add_insn(unit, RTLOP_FCVT, result, frB, 0, 0); set_fp_result(ctx, insn_frD(insn), result, 0, 0, frB, 0, 0, 0, true, false, true, true); } if (insn_Rc(insn)) { update_cr1(ctx); } return; } // case XO_FRSP case XO_FCTIW: translate_fctiw(ctx, insn, RTLOP_FROUNDI); return; case XO_FCTIWZ: translate_fctiw(ctx, insn, RTLOP_FTRUNCI); return; } } // if (insn_XO_5(insn) & 0x10) translate_illegal(ctx, insn); } /*-----------------------------------------------------------------------*/ /** * translate_insn: Translate the given instruction. * * [Parameters] * ctx: Translation context. * block: Basic block being translated. * address: Address of instruction being translated. * insn: Instruction word. */ static inline void translate_insn( GuestPPCContext * const ctx, GuestPPCBlockInfo * const block, const uint32_t address, const uint32_t insn) { RTLUnit * const unit = ctx->unit; /* Skip instructions which were translated as part of an optimized * instruction pair (such as sc followed by blr). */ if (ctx->skip_next_insn) { ctx->skip_next_insn = false; return; } switch (insn_OPCD(insn)) { case OPCD_TWI: translate_trap(ctx, address, insn, rtl_imm32(unit, insn_SIMM(insn))); return; case OPCD_x04: switch ((PPCExtendedOpcode04_750CL_5)insn_XO_5(insn)) { case XO_PS_CMP: case XO_PS_MOVE: case XO_PS_MERGE: case XO_PS_MISC: switch ((PPCExtendedOpcode04_750CL_10)insn_XO_10(insn)) { case XO_PS_CMPU0: translate_compare_fp(ctx, insn, false, 0); return; case XO_PS_CMPO0: translate_compare_fp(ctx, insn, true, 0); return; case XO_PS_CMPU1: translate_compare_fp(ctx, insn, false, 1); return; case XO_PS_CMPO1: translate_compare_fp(ctx, insn, true, 1); return; case XO_PS_NEG: translate_move_fpr(ctx, insn, RTLOP_FNEG, true); return; case XO_PS_MR: translate_move_fpr(ctx, insn, RTLOP_MOVE, true); return; case XO_PS_NABS: translate_move_fpr(ctx, insn, RTLOP_FNABS, true); return; case XO_PS_ABS: translate_move_fpr(ctx, insn, RTLOP_FABS, true); return; case XO_PS_MERGE00: translate_ps_merge(ctx, insn, 0, 0); return; case XO_PS_MERGE01: translate_ps_merge(ctx, insn, 0, 1); return; case XO_PS_MERGE10: translate_ps_merge(ctx, insn, 1, 0); return; case XO_PS_MERGE11: translate_ps_merge(ctx, insn, 1, 1); return; case XO_DCBZ_L: /* We treat "locked" cache identically to normal cache. */ translate_dcbz(ctx, insn); return; } translate_illegal(ctx, insn); return; case XO_PSQ_LX: translate_load_store_ps(ctx, insn, false, true, (insn_XO_10(insn) & 0x20) != 0); return; case XO_PSQ_STX: translate_load_store_ps(ctx, insn, true, true, (insn_XO_10(insn) & 0x20) != 0); return; case XO_PS_SUM0: translate_ps_sum(ctx, insn, 0); return; case XO_PS_SUM1: translate_ps_sum(ctx, insn, 1); return; case XO_PS_MULS0: translate_ps_arith(ctx, insn, RTLOP_FMUL, 0, FPSCR_VXIMZ); return; case XO_PS_MULS1: translate_ps_arith(ctx, insn, RTLOP_FMUL, 1, FPSCR_VXIMZ); return; case XO_PS_MADDS0: translate_ps_fma(ctx, insn, RTLOP_FMADD, 0, false); return; case XO_PS_MADDS1: translate_ps_fma(ctx, insn, RTLOP_FMADD, 1, false); return; case XO_PS_DIV: translate_ps_arith(ctx, insn, RTLOP_FDIV, -1, FPSCR_VXIDI | FPSCR_VXZDZ); return; case XO_PS_SUB: translate_ps_arith(ctx, insn, RTLOP_FSUB, -1, FPSCR_VXISI); return; case XO_PS_ADD: translate_ps_arith(ctx, insn, RTLOP_FADD, -1, FPSCR_VXISI); return; case XO_PS_SEL: translate_ps_sel(ctx, insn); return; case XO_PS_RES: translate_ps_recip(ctx, insn, false); return; case XO_PS_MUL: translate_ps_arith(ctx, insn, RTLOP_FMUL, -1, FPSCR_VXIMZ); return; case XO_PS_RSQRTE: translate_ps_recip(ctx, insn, true); return; case XO_PS_MSUB: translate_ps_fma(ctx, insn, RTLOP_FMSUB, -1, false); return; case XO_PS_MADD: translate_ps_fma(ctx, insn, RTLOP_FMADD, -1, false); return; case XO_PS_NMSUB: if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FNMADD_ZERO_SIGN) { translate_ps_fma(ctx, insn, RTLOP_FNMADD, -1, false); } else { translate_ps_fma(ctx, insn, RTLOP_FMSUB, -1, true); } return; case XO_PS_NMADD: if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FNMADD_ZERO_SIGN) { translate_ps_fma(ctx, insn, RTLOP_FNMSUB, -1, false); } else { translate_ps_fma(ctx, insn, RTLOP_FMADD, -1, true); } return; } translate_illegal(ctx, insn); return; case OPCD_MULLI: translate_arith_imm(ctx, insn, RTLOP_MULI, false, false, false); return; case OPCD_SUBFIC: { const int rA = get_gpr(ctx, insn_rA(insn)); const int32_t imm = insn_SIMM(insn); const int imm_reg = rtl_imm32(unit, imm); const int result = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_SUB, result, imm_reg, rA, 0); set_gpr(ctx, insn_rD(insn), result); const int xer = get_xer(ctx); const int ca = rtl_alloc_register(unit, RTLTYPE_INT32); if (imm == -1) { rtl_add_insn(unit, RTLOP_LOAD_IMM, ca, 0, 0, 1); } else { rtl_add_insn(unit, RTLOP_SLTUI, ca, result, 0, imm+1); } const int new_xer = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_BFINS, new_xer, xer, ca, XER_CA_SHIFT | 1<<8); set_xer(ctx, new_xer, -1); return; } // case OPCD_SUBFIC case OPCD_CMPLI: translate_compare(ctx, insn, true, false); return; case OPCD_CMPI: translate_compare(ctx, insn, true, true); return; case OPCD_ADDIC: translate_arith_imm(ctx, insn, RTLOP_ADDI, false, true, false); return; case OPCD_ADDIC_: translate_arith_imm(ctx, insn, RTLOP_ADDI, false, true, true); return; case OPCD_ADDI: if (insn_rA(insn) == 0) { // li set_gpr(ctx, insn_rD(insn), rtl_imm32(unit, insn_SIMM(insn))); } else { translate_arith_imm(ctx, insn, RTLOP_ADDI, false, false, false); } return; case OPCD_ADDIS: if (insn_rA(insn) == 0) { // lis set_gpr(ctx, insn_rD(insn), rtl_imm32(unit, insn_SIMM(insn) << 16)); } else { translate_arith_imm(ctx, insn, RTLOP_ADDI, true, false, false); } return; case OPCD_BC: translate_branch(ctx, address, insn_BO(insn), insn_BI(insn), insn_BD(insn), insn_AA(insn), insn_LK(insn)); return; case OPCD_SC: { /* Special case: translate sc followed by blr in a single step, to * avoid having to return to caller and call a new unit containing * just the blr. The scanner will terminate the block at an sc * instruction which is not followed by a blr, so we only need to * check whether this sc is at the end of the block. */ bool is_sc_blr = false; if ((ctx->handle->guest_opt & BINREC_OPT_G_PPC_SC_BLR) && address + 4 < block->start + block->len) { ASSERT(address + 8 == block->start + block->len); const uint32_t *memory_base = (const uint32_t *)ctx->handle->setup.guest_memory_base; const uint32_t next_insn = bswap_be32(memory_base[(address+4)/4]); ASSERT(next_insn == 0x4E800020); is_sc_blr = true; } int nia; if (is_sc_blr) { const int lr = get_lr(ctx); nia = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, nia, lr, 0, -4); } else { nia = rtl_imm32(unit, address + 4); } guest_ppc_flush_cr(ctx, false); guest_ppc_flush_fpscr(ctx); flush_live_regs(ctx, true); set_nia(ctx, nia); const int sc_handler = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD, sc_handler, ctx->psb_reg, 0, ctx->handle->setup.state_offsets_ppc.sc_handler); const uint32_t insn_reg = rtl_imm32(unit, insn); const int new_psb = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_CALL, new_psb, sc_handler, ctx->psb_reg, insn_reg); /* The post-instruction callback needs to use the new PSB in case * it was changed by the sc handler, but we also need to preserve * the original PSB for subsequent code. */ const int old_psb = ctx->psb_reg; ctx->psb_reg = new_psb; post_insn_callback(ctx, address); rtl_add_insn(unit, RTLOP_RETURN, 0, ctx->psb_reg, 0, 0); ctx->psb_reg = old_psb; ctx->skip_next_insn = is_sc_blr; return; } // case OPCD_SC case OPCD_B: translate_branch(ctx, address, 0x14, 0, insn_LI(insn), insn_AA(insn), insn_LK(insn)); return; case OPCD_x13: switch ((PPCExtendedOpcode13)insn_XO_10(insn)) { case XO_MCRF: { if (ctx->use_split_fields) { int crb[4]; for (int i = 0; i < 4; i++) { crb[i] = get_crb(ctx, insn_crfS(insn)*4 + i); } set_crf(ctx, insn_crfD(insn), crb[0], crb[1], crb[2], crb[3]); } else { const int crfS_bit = 4 * insn_crfS(insn); const int crfD_bit = 4 * insn_crfD(insn); const int old_cr = get_cr(ctx); const int field = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, field, old_cr, 0, 0xF0000000 >> crfS_bit); const int masked_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_ANDI, masked_cr, old_cr, 0, ~(0xF0000000 >> crfD_bit)); const int shifted_field = rtl_alloc_register(unit, RTLTYPE_INT32); if (crfD_bit > crfS_bit) { rtl_add_insn(unit, RTLOP_SRLI, shifted_field, field, 0, crfD_bit - crfS_bit); } else { rtl_add_insn(unit, RTLOP_SLLI, shifted_field, field, 0, crfS_bit - crfD_bit); } const int new_cr = rtl_alloc_register(unit, RTLTYPE_INT32); rtl_add_insn(unit, RTLOP_OR, new_cr, masked_cr, shifted_field, 0); set_cr(ctx, new_cr); } return; } case XO_BCLR: translate_branch_terminal(ctx, address, insn_BO(insn), insn_BI(insn), insn_LK(insn), 0, true, false); return; case XO_CRNOR: /* "crnor crbD,crbA,crbB" is also known as "crnot crbD,crbA", * but the usage frequency of crnot is probably not very high * in typical code and the potential speed gain is minimal, so * we don't bother with special handling. */ translate_logic_crb(ctx, insn, RTLOP_OR, false, true); return; case XO_RFI: translate_unimplemented_insn(ctx, address, insn); return; case XO_CRANDC: translate_logic_crb(ctx, insn, RTLOP_AND, true, false); return; case XO_ISYNC: // FIXME: We currently act as if all instructions are sequential. return; case XO_CRXOR: if (insn_crbA(insn) == insn_crbB(insn)) { // crclr set_crb(ctx, insn_crbD(insn), rtl_imm32(unit,0)); } else { translate_logic_crb(ctx, insn, RTLOP_XOR, false, false); } return; case XO_CRNAND: translate_logic_crb(ctx, insn, RTLOP_AND, false, true); return; case XO_CRAND: translate_logic_crb(ctx, insn, RTLOP_AND, false, false); return; case XO_CREQV: if (insn_crbA(insn) == insn_crbB(insn)) { // crset set_crb(ctx, insn_crbD(insn), rtl_imm32(unit,1)); } else { /* See note at XO_EQV under opcode 0x1F (though it's less * likely to help in this case). */ translate_logic_crb(ctx, insn, RTLOP_XOR, true, false); } return; case XO_CRORC: translate_logic_crb(ctx, insn, RTLOP_OR, true, false); return; case XO_CROR: /* "cror crbD,crbA,crbB" is also known as "crmove crbD,crbA", * but the usage frequency of crmove is probably not very high * in typical code and the potential speed gain is minimal, so * we don't bother with special handling. */ translate_logic_crb(ctx, insn, RTLOP_OR, false, false); return; case XO_BCCTR: if (!(insn_BO(insn) & 0x04)) { // Invalid BO field for bcctr. translate_illegal(ctx, insn); return; } translate_branch_terminal(ctx, address, insn_BO(insn), insn_BI(insn), insn_LK(insn), 0, false, true); return; } translate_illegal(ctx, insn); return; case OPCD_RLWIMI: translate_rotate_mask(ctx, insn, true, true); return; case OPCD_RLWINM: translate_rotate_mask(ctx, insn, true, false); return; case OPCD_RLWNM: translate_rotate_mask(ctx, insn, false, false); return; case OPCD_ORI: if (insn == 0x60000000) { // nop return; } translate_logic_imm(ctx, insn, RTLOP_ORI, false, false); return; case OPCD_ORIS: translate_logic_imm(ctx, insn, RTLOP_ORI, true, false); return; case OPCD_XORI: translate_logic_imm(ctx, insn, RTLOP_XORI, false, false); return; case OPCD_XORIS: translate_logic_imm(ctx, insn, RTLOP_XORI, true, false); return; case OPCD_ANDI_: translate_logic_imm(ctx, insn, RTLOP_ANDI, false, true); return; case OPCD_ANDIS_: translate_logic_imm(ctx, insn, RTLOP_ANDI, true, true); return; case OPCD_x1F: translate_x1F(ctx, block, address, insn); return; case OPCD_LWZ: translate_load_store_gpr(ctx, insn, RTLOP_LOAD, false, false, false); return; case OPCD_LWZU: translate_load_store_gpr(ctx, insn, RTLOP_LOAD, false, false, true); return; case OPCD_LBZ: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U8, false, false, false); return; case OPCD_LBZU: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U8, false, false, true); return; case OPCD_STW: translate_load_store_gpr(ctx, insn, RTLOP_STORE, true, false, false); return; case OPCD_STWU: translate_load_store_gpr(ctx, insn, RTLOP_STORE, true, false, true); return; case OPCD_STB: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I8, true, false, false); return; case OPCD_STBU: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I8, true, false, true); return; case OPCD_LHZ: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U16, false, false, false); return; case OPCD_LHZU: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_U16, false, false, true); return; case OPCD_LHA: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_S16, false, false, false); return; case OPCD_LHAU: translate_load_store_gpr(ctx, insn, RTLOP_LOAD_S16, false, false, true); return; case OPCD_STH: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I16, true, false, false); return; case OPCD_STHU: translate_load_store_gpr(ctx, insn, RTLOP_STORE_I16, true, false, true); return; case OPCD_LMW: translate_load_store_multiple(ctx, insn, false); return; case OPCD_STMW: translate_load_store_multiple(ctx, insn, true); return; case OPCD_LFS: translate_load_store_fpr(ctx, insn, true, false, false, false); return; case OPCD_LFSU: translate_load_store_fpr(ctx, insn, true, false, false, true); return; case OPCD_LFD: translate_load_store_fpr(ctx, insn, false, false, false, false); return; case OPCD_LFDU: translate_load_store_fpr(ctx, insn, false, false, false, true); return; case OPCD_STFS: translate_load_store_fpr(ctx, insn, true, true, false, false); return; case OPCD_STFSU: translate_load_store_fpr(ctx, insn, true, true, false, true); return; case OPCD_STFD: translate_load_store_fpr(ctx, insn, false, true, false, false); return; case OPCD_STFDU: translate_load_store_fpr(ctx, insn, false, true, false, true); return; case OPCD_PSQ_L: translate_load_store_ps(ctx, insn, false, false, false); return; case OPCD_PSQ_LU: translate_load_store_ps(ctx, insn, false, false, true); return; case OPCD_PSQ_ST: translate_load_store_ps(ctx, insn, true, false, false); return; case OPCD_PSQ_STU: translate_load_store_ps(ctx, insn, true, false, true); return; case OPCD_x3B: switch ((PPCExtendedOpcode3B)insn_XO_5(insn)) { case XO_FDIVS: translate_fp_arith(ctx, insn, RTLOP_FDIV, true, FPSCR_VXIDI | FPSCR_VXZDZ); return; case XO_FSUBS: translate_fp_arith(ctx, insn, RTLOP_FSUB, true, FPSCR_VXISI); return; case XO_FADDS: translate_fp_arith(ctx, insn, RTLOP_FADD, true, FPSCR_VXISI); return; case XO_FRES: translate_fp_recip(ctx, insn, false); return; case XO_FMULS: translate_fp_arith(ctx, insn, RTLOP_FMUL, true, FPSCR_VXIMZ); return; case XO_FMSUBS: translate_fp_fma(ctx, insn, RTLOP_FMSUB, true, false); return; case XO_FMADDS: translate_fp_fma(ctx, insn, RTLOP_FMADD, true, false); return; case XO_FNMSUBS: if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FNMADD_ZERO_SIGN) { translate_fp_fma(ctx, insn, RTLOP_FNMADD, true, false); } else { translate_fp_fma(ctx, insn, RTLOP_FMSUB, true, true); } return; case XO_FNMADDS: if (ctx->handle->guest_opt & BINREC_OPT_G_PPC_FNMADD_ZERO_SIGN) { translate_fp_fma(ctx, insn, RTLOP_FNMSUB, true, false); } else { translate_fp_fma(ctx, insn, RTLOP_FMADD, true, true); } return; } translate_illegal(ctx, insn); return; case OPCD_x3F: translate_x3F(ctx, address, insn); return; } // switch (insn_OPCD(insn)) translate_illegal(ctx, insn); } /*************************************************************************/ /********************** Internal interface routines **********************/ /*************************************************************************/ bool guest_ppc_translate_block(GuestPPCContext *ctx, int index) { ASSERT(ctx); ASSERT(ctx->handle); ASSERT(ctx->unit); ASSERT(index >= 0 && index < ctx->num_blocks); RTLUnit * const unit = ctx->unit; ctx->current_block = index; ctx->cur_block_rtl_start = unit->num_insns; GuestPPCBlockInfo *block = &ctx->blocks[index]; const uint32_t start = block->start; const uint32_t *memory_base = (const uint32_t *)ctx->handle->setup.guest_memory_base; if (block->is_branch_target) { rtl_add_insn(unit, RTLOP_LABEL, 0, 0, 0, block->label); if (UNLIKELY(rtl_get_error_state(unit))) { log_ice(ctx->handle, "Failed to add label at 0x%X", start); return false; } } if (UNLIKELY(block->len == 0)) { /* This block was a backward branch target that wasn't part of a * previous block (see block-splitting logic at the bottom of * guest_ppc_scan()). Update NIA and return to the caller to * retranslate from the target address. */ return_from_unit(ctx, ~0, rtl_imm32(unit, start), false); if (UNLIKELY(rtl_get_error_state(unit))) { log_ice(ctx->handle, "Failed to translate empty block at 0x%X", start); return false; } return true; } memset(&ctx->live, 0, sizeof(ctx->live)); ctx->fpr_dirty = 0; ctx->fpr_is_safe = 0; ctx->ps1_is_safe = 0; ctx->crb_dirty = 0; memset(&ctx->last_set, -1, sizeof(ctx->last_set)); memset(&ctx->gpr_raw, 0, sizeof(ctx->gpr_raw)); memset(&ctx->fpr_raw, 0, sizeof(ctx->fpr_raw)); memset(&ctx->ps_raw, 0, sizeof(ctx->ps_raw)); ctx->paired_lwarx_data_be = 0; ctx->skip_next_insn = false; for (uint32_t ofs = 0; ofs < block->len; ofs += 4) { const uint32_t address = start + ofs; if (ctx->handle->pre_insn_callback) { flush_live_regs(ctx, false); const int func = rtl_alloc_register(unit, RTLTYPE_ADDRESS); rtl_add_insn(unit, RTLOP_LOAD_IMM, func, 0, 0, (uintptr_t)ctx->handle->pre_insn_callback); rtl_add_insn(unit, RTLOP_CALL_TRANSPARENT, 0, func, ctx->psb_reg, rtl_imm32(unit, address)); } const uint32_t insn = bswap_be32(memory_base[address/4]); translate_insn(ctx, block, address, insn); /* Explicitly check for the presence of a callback (even though * post_insn_callback() does so too) so we don't repeatedly set * NIA if it's not necessary. */ if (ctx->handle->post_insn_callback) { set_nia_imm(ctx, address + 4); post_insn_callback(ctx, address); } if (UNLIKELY(rtl_get_error_state(unit))) { log_ice(ctx->handle, "Failed to translate instruction at 0x%X", address); return false; } } /* If the last instruction of the block is not a branch or trap, check * for dead CR stores (if requested) before entering the next block. */ if (ctx->trim_cr_stores && !block->has_branch && !block->has_trap) { guest_ppc_trim_cr_stores(ctx, 0x14, 0, NULL, NULL, NULL, NULL); } flush_live_regs(ctx, true); set_nia_imm(ctx, start + block->len); if (UNLIKELY(rtl_get_error_state(unit))) { log_ice(ctx->handle, "Failed to update registers after block end 0x%X", start + block->len - 4); return false; } return true; } /*-----------------------------------------------------------------------*/ void guest_ppc_flush_cr(GuestPPCContext *ctx, bool make_live) { ASSERT(ctx); ASSERT(ctx->handle); ASSERT(ctx->unit); if (!ctx->use_split_fields) { return; } RTLUnit * const unit = ctx->unit; if (ctx->crb_changed_bitrev) { const int cr = merge_cr(ctx, make_live); if (make_live) { set_cr(ctx, cr); memset(ctx->last_set.crb, -1, sizeof(ctx->last_set.crb)); memset(ctx->live.crb, 0, sizeof(ctx->live.crb)); ctx->crb_dirty = 0; } else { rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, cr, 0, ctx->alias.cr); } } } /*-----------------------------------------------------------------------*/ void guest_ppc_flush_fpscr(GuestPPCContext *ctx) { ASSERT(ctx); ASSERT(ctx->handle); ASSERT(ctx->unit); if (!ctx->use_split_fields) { return; } RTLUnit * const unit = ctx->unit; if (ctx->fpscr_changed && ctx->alias.fr_fi_fprf) { const int fpscr = merge_fpscr(ctx, false); rtl_add_insn(unit, RTLOP_SET_ALIAS, 0, fpscr, 0, ctx->alias.fpscr); if (ctx->live.fpscr) { ctx->live.fpscr = fpscr; ctx->last_set.fpscr = -1; } } } /*-----------------------------------------------------------------------*/ int guest_ppc_get_epilogue_label(GuestPPCContext *ctx) { if (!ctx->epilogue_label) { ctx->epilogue_label = rtl_alloc_label(ctx->unit); } return ctx->epilogue_label; } /*************************************************************************/ /*************************************************************************/