Index: jdk17u-jdk-17.0.5-8/make/autoconf/jvm-features.m4 =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/autoconf/jvm-features.m4 +++ jdk17u-jdk-17.0.5-8/make/autoconf/jvm-features.m4 @@ -308,7 +308,8 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_SHENAN AC_MSG_CHECKING([if platform is supported by Shenandoah]) if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ - test "x$OPENJDK_TARGET_CPU" = "xppc64le"; then + test "x$OPENJDK_TARGET_CPU" = "xppc64le" || \ + test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU]) @@ -358,7 +359,8 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_ZGC], AC_MSG_RESULT([no, $OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU]) AVAILABLE=false fi - elif test "x$OPENJDK_TARGET_CPU" = "xppc64le"; then + elif test "x$OPENJDK_TARGET_CPU" = "xppc64le" || \ + test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then if test "x$OPENJDK_TARGET_OS" = "xlinux"; then AC_MSG_RESULT([yes]) else Index: jdk17u-jdk-17.0.5-8/make/autoconf/libraries.m4 =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/autoconf/libraries.m4 +++ jdk17u-jdk-17.0.5-8/make/autoconf/libraries.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -148,6 +148,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], fi fi + # Because RISC-V only has word-sized atomics, it requries libatomic where + # other common architectures do not. So link libatomic by default. + if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then + BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" + fi + # perfstat lib if test "x$OPENJDK_TARGET_OS" = xaix; then BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat" Index: jdk17u-jdk-17.0.5-8/make/autoconf/platform.m4 =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/autoconf/platform.m4 +++ jdk17u-jdk-17.0.5-8/make/autoconf/platform.m4 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -561,6 +561,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HEL HOTSPOT_$1_CPU_DEFINE=PPC64 elif test "x$OPENJDK_$1_CPU" = xppc64le; then HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xriscv64; then + HOTSPOT_$1_CPU_DEFINE=RISCV64 # The cpu defines below are for zero, we don't support them directly. elif test "x$OPENJDK_$1_CPU" = xsparc; then @@ -571,8 +573,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HEL HOTSPOT_$1_CPU_DEFINE=S390 elif test "x$OPENJDK_$1_CPU" = xs390x; then HOTSPOT_$1_CPU_DEFINE=S390 - elif test "x$OPENJDK_$1_CPU" = xriscv64; then - HOTSPOT_$1_CPU_DEFINE=RISCV elif test "x$OPENJDK_$1_CPU" != x; then HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z) fi Index: jdk17u-jdk-17.0.5-8/make/hotspot/gensrc/GensrcAdlc.gmk =================================================================== --- jdk17u-jdk-17.0.5-8.orig/make/hotspot/gensrc/GensrcAdlc.gmk +++ jdk17u-jdk-17.0.5-8/make/hotspot/gensrc/GensrcAdlc.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -149,6 +149,13 @@ ifeq ($(call check-jvm-feature, compiler ))) endif + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ + ))) + endif + ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1590,7 +1590,9 @@ void LIR_Assembler::emit_compare_and_swa } -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64"); Assembler::Condition acond, ncond; switch (condition) { Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1417,7 +1417,10 @@ void LIR_Assembler::emit_compare_and_swa } -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm"); + AsmCondition acond = al; AsmCondition ncond = nv; if (opr1 != opr2) { Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp =================================================================== --- jdk17u-jdk-17.0.5-8.orig/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2021 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1554,8 +1554,10 @@ inline void load_to_reg(LIR_Assembler *l } } +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc"); -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) { load_to_reg(this, opr1, result); // Condition doesn't matter. return; Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" +#include "oops/klass.inline.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved fp thru expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = frame::entry_frame_after_call_words; + assert_cond(method != NULL); + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return (overhead_size + method_stack + stub_code); +} + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in TemplateInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params) + + monitors * frame::interpreter_frame_monitor_size() + + // On the top frame, at all times SP <= ESP, and SP is + // 16-aligned. We ensure this by adjusting SP on method + // entry and re-entry to allow room for the maximum size of + // the expression stack. When we call another method we bump + // SP so that no stack space is wasted. So, only on the top + // frame do we need to allow max_stack words. + (is_top_frame ? max_stack : temps + extra_args); + + // On riscv we always keep the stack pointer 16-aligned, so we + // must round up here. + size = align_up(size, 2); + + return size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL); + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp + intptr_t* locals = NULL; + if (caller->is_interpreted_frame()) { + locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1; + } else { + locals = interpreter_frame->sender_sp() + max_locals - 1; + } + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* last_sp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(last_sp); + + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } + + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = + method->method_holder()->java_mirror(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.cpp @@ -0,0 +1,372 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" + +int AbstractAssembler::code_fill_byte() { + return 0; +} + +void Assembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { + if (is_imm_in_range(increment, 12, 0)) { + addi(Rd, Rn, increment); + } else { + assert_different_registers(Rn, temp); + li(temp, increment); + add(Rd, Rn, temp); + } +} + +void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) { + if (is_imm_in_range(increment, 12, 0)) { + addiw(Rd, Rn, increment); + } else { + assert_different_registers(Rn, temp); + li(temp, increment); + addw(Rd, Rn, temp); + } +} + +void Assembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { + if (is_imm_in_range(-decrement, 12, 0)) { + addi(Rd, Rn, -decrement); + } else { + assert_different_registers(Rn, temp); + li(temp, decrement); + sub(Rd, Rn, temp); + } +} + +void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) { + if (is_imm_in_range(-decrement, 12, 0)) { + addiw(Rd, Rn, -decrement); + } else { + assert_different_registers(Rn, temp); + li(temp, decrement); + subw(Rd, Rn, temp); + } +} + +void Assembler::zext_w(Register Rd, Register Rs) { + add_uw(Rd, Rs, zr); +} + +void Assembler::_li(Register Rd, int64_t imm) { + // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff + int shift = 12; + int64_t upper = imm, lower = imm; + // Split imm to a lower 12-bit sign-extended part and the remainder, + // because addi will sign-extend the lower imm. + lower = ((int32_t)imm << 20) >> 20; + upper -= lower; + + // Test whether imm is a 32-bit integer. + if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || + (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { + while (((upper >> shift) & 1) == 0) { shift++; } + upper >>= shift; + li(Rd, upper); + slli(Rd, Rd, shift); + if (lower != 0) { + addi(Rd, Rd, lower); + } + } else { + // 32-bit integer + Register hi_Rd = zr; + if (upper != 0) { + lui(Rd, (int32_t)upper); + hi_Rd = Rd; + } + if (lower != 0 || hi_Rd == zr) { + addiw(Rd, hi_Rd, lower); + } + } +} + +void Assembler::li64(Register Rd, int64_t imm) { + // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or + // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. + int64_t lower = imm & 0xffffffff; + lower -= ((lower << 44) >> 44); + int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; + int32_t upper = (tmp_imm - (int32_t)lower) >> 32; + + // Load upper 32 bits + int64_t up = upper, lo = upper; + lo = (lo << 52) >> 52; + up -= lo; + up = (int32_t)up; + lui(Rd, up); + addi(Rd, Rd, lo); + + // Load the rest 32 bits. + slli(Rd, Rd, 12); + addi(Rd, Rd, (int32_t)lower >> 20); + slli(Rd, Rd, 12); + lower = ((int32_t)imm << 12) >> 20; + addi(Rd, Rd, lower); + slli(Rd, Rd, 8); + lower = imm & 0xff; + addi(Rd, Rd, lower); +} + +void Assembler::li32(Register Rd, int32_t imm) { + // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit + int64_t upper = imm, lower = imm; + lower = (imm << 20) >> 20; + upper -= lower; + upper = (int32_t)upper; + // lui Rd, imm[31:12] + imm[11] + lui(Rd, upper); + // use addiw to distinguish li32 to li64 + addiw(Rd, Rd, lower); +} + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const address &dest, Register temp) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + if (is_imm_in_range(distance, 20, 1)) { \ + jal(REGISTER, distance); \ + } else { \ + assert(temp != noreg, "temp must not be empty register!"); \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + jalr(REGISTER, temp, offset); \ + } \ + } \ + void Assembler::NAME(Label &l, Register temp) { \ + jal(REGISTER, l, temp); \ + } \ + + INSN(j, x0); + INSN(jal, x1); + +#undef INSN + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(Register Rs) { \ + jalr(REGISTER, Rs, 0); \ + } + + INSN(jr, x0); + INSN(jalr, x1); + +#undef INSN + +void Assembler::ret() { + jalr(x0, x1, 0); +} + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const address &dest, Register temp) { \ + assert_cond(dest != NULL); \ + assert(temp != noreg, "temp must not be empty register!"); \ + int64_t distance = dest - pc(); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, distance + 0x800); \ + jalr(REGISTER, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + jalr(REGISTER, temp, offset); \ + } \ + } + + INSN(call, x1); + INSN(tail, x0); + +#undef INSN + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const Address &adr, Register temp) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + int32_t offset = 0; \ + baseOffset(temp, adr, offset); \ + jalr(REGISTER, temp, offset); \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(j, x0); + INSN(jal, x1); + INSN(call, x1); + INSN(tail, x0); + +#undef INSN + +void Assembler::wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn, + compare_and_branch_label_insn neg_insn, bool is_far) { + if (is_far) { + Label done; + (this->*neg_insn)(r1, r2, done, /* is_far */ false); + j(L); + bind(done); + } else { + if (L.is_bound()) { + (this->*insn)(r1, r2, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r1, r2, pc()); + } + } +} + +void Assembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { + if (L.is_bound()) { + (this->*insn)(Rt, target(L), tmp); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(Rt, pc(), tmp); + } +} + +void Assembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { + if (L.is_bound()) { + (this->*insn)(Rt, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(Rt, pc()); + } +} + +void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { + uintptr_t imm64 = (uintptr_t)addr; +#ifndef PRODUCT + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); + block_comment(buffer); + } +#endif + assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), + "48-bit overflow in address constant"); + // Load upper 32 bits + int32_t imm = imm64 >> 16; + int64_t upper = imm, lower = imm; + lower = (lower << 52) >> 52; + upper -= lower; + upper = (int32_t)upper; + lui(Rd, upper); + addi(Rd, Rd, lower); + + // Load the rest 16 bits. + slli(Rd, Rd, 11); + addi(Rd, Rd, (imm64 >> 5) & 0x7ff); + slli(Rd, Rd, 5); + + // This offset will be used by following jalr/ld. + offset = imm64 & 0x1f; +} + +void Assembler::movptr(Register Rd, uintptr_t imm64) { + movptr(Rd, (address)imm64); +} + +void Assembler::movptr(Register Rd, address addr) { + int offset = 0; + movptr_with_offset(Rd, addr, offset); + addi(Rd, Rd, offset); +} + +void Assembler::ifence() { + fence_i(); + if (UseConservativeFence) { + fence(ir, ir); + } +} + +#define INSN(NAME, NEG_INSN) \ + void Assembler::NAME(Register Rs, Register Rt, const address &dest) { \ + NEG_INSN(Rt, Rs, dest); \ + } \ + void Assembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ + NEG_INSN(Rt, Rs, l, is_far); \ + } + + INSN(bgt, blt); + INSN(ble, bge); + INSN(bgtu, bltu); + INSN(bleu, bgeu); +#undef INSN + +#undef __ + +Address::Address(address target, relocInfo::relocType rtype) : _base(noreg), _offset(0), _mode(literal) { + _target = target; + switch (rtype) { + case relocInfo::oop_type: + case relocInfo::metadata_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + break; + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(target); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(target); + break; + case relocInfo::opt_virtual_call_type: + _rspec = opt_virtual_call_Relocation::spec(); + break; + case relocInfo::static_call_type: + _rspec = static_call_Relocation::spec(); + break; + case relocInfo::runtime_call_type: + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + _rspec = RelocationHolder::none; + break; + default: + ShouldNotReachHere(); + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -0,0 +1,3049 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_ASSEMBLER_RISCV_HPP +#define CPU_RISCV_ASSEMBLER_RISCV_HPP + +#include "asm/register.hpp" +#include "assembler_riscv.inline.hpp" +#include "metaprogramming/enableIf.hpp" + +#define XLEN 64 + +// definitions of various symbolic names for machine registers + +// First intercalls between C and Java which use 8 general registers +// and 8 floating registers + +class Argument { + public: + enum { + n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) + n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + + n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...) + n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) + }; +}; + +// function argument(caller-save registers) +REGISTER_DECLARATION(Register, c_rarg0, x10); +REGISTER_DECLARATION(Register, c_rarg1, x11); +REGISTER_DECLARATION(Register, c_rarg2, x12); +REGISTER_DECLARATION(Register, c_rarg3, x13); +REGISTER_DECLARATION(Register, c_rarg4, x14); +REGISTER_DECLARATION(Register, c_rarg5, x15); +REGISTER_DECLARATION(Register, c_rarg6, x16); +REGISTER_DECLARATION(Register, c_rarg7, x17); + +REGISTER_DECLARATION(FloatRegister, c_farg0, f10); +REGISTER_DECLARATION(FloatRegister, c_farg1, f11); +REGISTER_DECLARATION(FloatRegister, c_farg2, f12); +REGISTER_DECLARATION(FloatRegister, c_farg3, f13); +REGISTER_DECLARATION(FloatRegister, c_farg4, f14); +REGISTER_DECLARATION(FloatRegister, c_farg5, f15); +REGISTER_DECLARATION(FloatRegister, c_farg6, f16); +REGISTER_DECLARATION(FloatRegister, c_farg7, f17); + +// Symbolically name the register arguments used by the Java calling convention. +// We have control over the convention for java so we can do what we please. +// What pleases us is to offset the java calling convention so that when +// we call a suitable jni method the arguments are lined up and we don't +// have to do much shuffling. A suitable jni method is non-static and a +// small number of arguments. +// +// |------------------------------------------------------------------------| +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | +// |------------------------------------------------------------------------| +// | x10 x11 x12 x13 x14 x15 x16 x17 | +// |------------------------------------------------------------------------| +// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | +// |------------------------------------------------------------------------| + +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); +REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); +REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); +REGISTER_DECLARATION(Register, j_rarg5, c_rarg6); +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); + +// Java floating args are passed as per C + +REGISTER_DECLARATION(FloatRegister, j_farg0, f10); +REGISTER_DECLARATION(FloatRegister, j_farg1, f11); +REGISTER_DECLARATION(FloatRegister, j_farg2, f12); +REGISTER_DECLARATION(FloatRegister, j_farg3, f13); +REGISTER_DECLARATION(FloatRegister, j_farg4, f14); +REGISTER_DECLARATION(FloatRegister, j_farg5, f15); +REGISTER_DECLARATION(FloatRegister, j_farg6, f16); +REGISTER_DECLARATION(FloatRegister, j_farg7, f17); + +// zero rigster +REGISTER_DECLARATION(Register, zr, x0); +// global pointer +REGISTER_DECLARATION(Register, gp, x3); +// thread pointer +REGISTER_DECLARATION(Register, tp, x4); + +// registers used to hold VM data either temporarily within a method +// or across method calls + +// volatile (caller-save) registers + +// current method -- must be in a call-clobbered register +REGISTER_DECLARATION(Register, xmethod, x31); +// return address +REGISTER_DECLARATION(Register, ra, x1); + +// non-volatile (callee-save) registers + +// stack pointer +REGISTER_DECLARATION(Register, sp, x2); +// frame pointer +REGISTER_DECLARATION(Register, fp, x8); +// base of heap +REGISTER_DECLARATION(Register, xheapbase, x27); +// constant pool cache +REGISTER_DECLARATION(Register, xcpool, x26); +// monitors allocated on stack +REGISTER_DECLARATION(Register, xmonitors, x25); +// locals on stack +REGISTER_DECLARATION(Register, xlocals, x24); + +// java thread pointer +REGISTER_DECLARATION(Register, xthread, x23); +// bytecode pointer +REGISTER_DECLARATION(Register, xbcp, x22); +// Dispatch table base +REGISTER_DECLARATION(Register, xdispatch, x21); +// Java stack pointer +REGISTER_DECLARATION(Register, esp, x20); + +// temporary register(caller-save registers) +REGISTER_DECLARATION(Register, t0, x5); +REGISTER_DECLARATION(Register, t1, x6); +REGISTER_DECLARATION(Register, t2, x7); + +const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { + c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 +}; + +const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = { + c_farg0, c_farg1, c_farg2, c_farg3, c_farg4, c_farg5, c_farg6, c_farg7 +}; + +#define assert_cond(ARG1) assert(ARG1, #ARG1) + +// Addressing modes +class Address { + public: + + enum mode { no_mode, base_plus_offset, pcrel, literal }; + + private: + Register _base; + Register _index; + int64_t _offset; + enum mode _mode; + + RelocationHolder _rspec; + + // If the target is far we'll need to load the ea of this to a + // register to reach it. Otherwise if near we can do PC-relative + // addressing. + address _target; + + public: + Address() + : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) + : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } + + template::value)> + Address(Register r, T o) + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} + + Address(Register r, ByteSize disp) + : Address(r, in_bytes(disp)) {} + Address(address target, RelocationHolder const& rspec) + : _base(noreg), + _index(noreg), + _offset(0), + _mode(literal), + _rspec(rspec), + _target(target) { } + Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); + + const Register base() const { + guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode"); + return _base; + } + long offset() const { + return _offset; + } + Register index() const { + return _index; + } + mode getMode() const { + return _mode; + } + + bool uses(Register reg) const { return _base == reg; } + const address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + ~Address() { + _target = NULL; + _base = NULL; + } +}; + +// Convience classes +class RuntimeAddress: public Address { + + public: + + RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {} + ~RuntimeAddress() {} +}; + +class OopAddress: public Address { + + public: + + OopAddress(address target) : Address(target, relocInfo::oop_type) {} + ~OopAddress() {} +}; + +class ExternalAddress: public Address { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + + ExternalAddress(address target) : Address(target, reloc_for_target(target)) {} + ~ExternalAddress() {} +}; + +class InternalAddress: public Address { + + public: + + InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} + ~InternalAddress() {} +}; + +class Assembler : public AbstractAssembler { +public: + + enum { instruction_size = 4 }; + + //---< calculate length of instruction >--- + // We just use the values set above. + // instruction must start at passed address + static unsigned int instr_len(unsigned char *instr) { return instruction_size; } + + //---< longest instructions >--- + static unsigned int instr_maxlen() { return instruction_size; } + + enum RoundingMode { + rne = 0b000, // round to Nearest, ties to Even + rtz = 0b001, // round towards Zero + rdn = 0b010, // round Down (towards eegative infinity) + rup = 0b011, // round Up (towards infinity) + rmm = 0b100, // round to Nearest, ties to Max Magnitude + rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid. + }; + + void baseOffset32(Register Rd, const Address &adr, int32_t &offset) { + assert(Rd != noreg, "Rd must not be empty register!"); + guarantee(Rd != adr.base(), "should use different registers!"); + if (is_offset_in_range(adr.offset(), 32)) { + int32_t imm = adr.offset(); + int32_t upper = imm, lower = imm; + lower = (imm << 20) >> 20; + upper -= lower; + lui(Rd, upper); + offset = lower; + } else { + movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset); + } + add(Rd, Rd, adr.base()); + } + + void baseOffset(Register Rd, const Address &adr, int32_t &offset) { + if (is_offset_in_range(adr.offset(), 12)) { + assert(Rd != noreg, "Rd must not be empty register!"); + addi(Rd, adr.base(), adr.offset()); + offset = 0; + } else { + baseOffset32(Rd, adr, offset); + } + } + + void _li(Register Rd, int64_t imm); // optimized load immediate + void li32(Register Rd, int32_t imm); + void li64(Register Rd, int64_t imm); + void movptr(Register Rd, address addr); + void movptr_with_offset(Register Rd, address addr, int32_t &offset); + void movptr(Register Rd, uintptr_t imm64); + void ifence(); + void j(const address &dest, Register temp = t0); + void j(const Address &adr, Register temp = t0); + void j(Label &l, Register temp = t0); + void jal(Label &l, Register temp = t0); + void jal(const address &dest, Register temp = t0); + void jal(const Address &adr, Register temp = t0); + void jr(Register Rs); + void jalr(Register Rs); + void ret(); + void call(const address &dest, Register temp = t0); + void call(const Address &adr, Register temp = t0); + void tail(const address &dest, Register temp = t0); + void tail(const Address &adr, Register temp = t0); + void call(Label &l, Register temp) { + call(target(l), temp); + } + void tail(Label &l, Register temp) { + tail(target(l), temp); + } + + static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; + uint32_t mask = (1U << nbits) - 1; + uint32_t result = val >> lsb; + result &= mask; + return result; + } + + static inline int32_t sextract(uint32_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 31); + int32_t result = val << (31 - msb); + result >>= (31 - msb + lsb); + return result; + } + + static void patch(address a, unsigned msb, unsigned lsb, unsigned val) { + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; + target &= ~mask; + target |= val; + *(unsigned *)a = target; + } + + static void patch(address a, unsigned bit, unsigned val) { + patch(a, bit, bit, val); + } + + static void patch_reg(address a, unsigned lsb, Register reg) { + patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + static void patch_reg(address a, unsigned lsb, FloatRegister reg) { + patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + static void patch_reg(address a, unsigned lsb, VectorRegister reg) { + patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + void emit(unsigned insn) { + emit_int32((jint)insn); + } + + void _halt() { + emit_int32(0); + } + +// Register Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(_add, 0b0110011, 0b000, 0b0000000); + INSN(_sub, 0b0110011, 0b000, 0b0100000); + INSN(_andr, 0b0110011, 0b111, 0b0000000); + INSN(_orr, 0b0110011, 0b110, 0b0000000); + INSN(_xorr, 0b0110011, 0b100, 0b0000000); + INSN(sll, 0b0110011, 0b001, 0b0000000); + INSN(sra, 0b0110011, 0b101, 0b0100000); + INSN(srl, 0b0110011, 0b101, 0b0000000); + INSN(slt, 0b0110011, 0b010, 0b0000000); + INSN(sltu, 0b0110011, 0b011, 0b0000000); + INSN(_addw, 0b0111011, 0b000, 0b0000000); + INSN(_subw, 0b0111011, 0b000, 0b0100000); + INSN(sllw, 0b0111011, 0b001, 0b0000000); + INSN(sraw, 0b0111011, 0b101, 0b0100000); + INSN(srlw, 0b0111011, 0b101, 0b0000000); + INSN(mul, 0b0110011, 0b000, 0b0000001); + INSN(mulh, 0b0110011, 0b001, 0b0000001); + INSN(mulhsu,0b0110011, 0b010, 0b0000001); + INSN(mulhu, 0b0110011, 0b011, 0b0000001); + INSN(mulw, 0b0111011, 0b000, 0b0000001); + INSN(div, 0b0110011, 0b100, 0b0000001); + INSN(divu, 0b0110011, 0b101, 0b0000001); + INSN(divw, 0b0111011, 0b100, 0b0000001); + INSN(divuw, 0b0111011, 0b101, 0b0000001); + INSN(rem, 0b0110011, 0b110, 0b0000001); + INSN(remu, 0b0110011, 0b111, 0b0000001); + INSN(remw, 0b0111011, 0b110, 0b0000001); + INSN(remuw, 0b0111011, 0b111, 0b0000001); + +#undef INSN + +#define INSN_ENTRY_RELOC(result_type, header) \ + result_type header { \ + InstructionMark im(this); \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ + code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); + + // Load/store register (all modes) +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + int32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ + } + + INSN(lb, 0b0000011, 0b000); + INSN(lbu, 0b0000011, 0b100); + INSN(lh, 0b0000011, 0b001); + INSN(lhu, 0b0000011, 0b101); + INSN(_lw, 0b0000011, 0b010); + INSN(lwu, 0b0000011, 0b110); + INSN(_ld, 0b0000011, 0b011); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rd, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(Rd, (int32_t)distance + 0x800); \ + NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(Rd, dest, offset); \ + NAME(Rd, Rd, offset); \ + } \ + } \ + INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \ + NAME(Rd, dest); \ + } \ + void NAME(Register Rd, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target()); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ + int32_t offset = 0; \ + if (Rd == adr.base()) { \ + baseOffset32(temp, adr, offset); \ + NAME(Rd, temp, offset); \ + } else { \ + baseOffset32(Rd, adr, offset); \ + NAME(Rd, Rd, offset); \ + } \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } \ + void NAME(Register Rd, Label &L) { \ + wrap_label(Rd, L, &Assembler::NAME); \ + } + + INSN(lb); + INSN(lbu); + INSN(lh); + INSN(lhu); + INSN(lw); + INSN(lwu); + INSN(ld); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ + } + + INSN(flw, 0b0000111, 0b010); + INSN(_fld, 0b0000111, 0b011); + +#undef INSN + +#define INSN(NAME) \ + void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, (int32_t)distance + 0x800); \ + NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + NAME(Rd, temp, offset); \ + } \ + } \ + INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, relocInfo::relocType rtype, Register temp = t0)) \ + NAME(Rd, dest, temp); \ + } \ + void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ + int32_t offset = 0; \ + baseOffset32(temp, adr, offset); \ + NAME(Rd, temp, offset); \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(flw); + INSN(fld); +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ + guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ + unsigned insn = 0; \ + uint32_t val = offset & 0x1fff; \ + uint32_t val11 = (val >> 11) & 0x1; \ + uint32_t val12 = (val >> 12) & 0x1; \ + uint32_t low = (val >> 1) & 0xf; \ + uint32_t high = (val >> 5) & 0x3f; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + patch((address)&insn, 7, val11); \ + patch((address)&insn, 11, 8, low); \ + patch((address)&insn, 30, 25, high); \ + patch((address)&insn, 31, val12); \ + emit(insn); \ + } + + INSN(_beq, 0b1100011, 0b000); + INSN(_bne, 0b1100011, 0b001); + INSN(bge, 0b1100011, 0b101); + INSN(bgeu, 0b1100011, 0b111); + INSN(blt, 0b1100011, 0b100); + INSN(bltu, 0b1100011, 0b110); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rs1, Register Rs2, const address dest) { \ + assert_cond(dest != NULL); \ + int64_t offset = (dest - pc()); \ + guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ + NAME(Rs1, Rs2, offset); \ + } \ + INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \ + NAME(Rs1, Rs2, dest); \ + } + + INSN(beq); + INSN(bne); + INSN(bge); + INSN(bgeu); + INSN(blt); + INSN(bltu); + +#undef INSN + +#define INSN(NAME, NEG_INSN) \ + void NAME(Register Rs1, Register Rs2, Label &L, bool is_far = false) { \ + wrap_label(Rs1, Rs2, L, &Assembler::NAME, &Assembler::NEG_INSN, is_far); \ + } + + INSN(beq, bne); + INSN(bne, beq); + INSN(blt, bge); + INSN(bge, blt); + INSN(bltu, bgeu); + INSN(bgeu, bltu); + +#undef INSN + +#define INSN(NAME, REGISTER, op, funct3) \ + void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + uint32_t low = val & 0x1f; \ + uint32_t high = (val >> 5) & 0x7f; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 15, Rs2); \ + patch_reg((address)&insn, 20, Rs1); \ + patch((address)&insn, 11, 7, low); \ + patch((address)&insn, 31, 25, high); \ + emit(insn); \ + } \ + + INSN(sb, Register, 0b0100011, 0b000); + INSN(sh, Register, 0b0100011, 0b001); + INSN(_sw, Register, 0b0100011, 0b010); + INSN(_sd, Register, 0b0100011, 0b011); + INSN(fsw, FloatRegister, 0b0100111, 0b010); + INSN(_fsd, FloatRegister, 0b0100111, 0b011); + +#undef INSN + +#define INSN(NAME, REGISTER) \ + INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ + NAME(Rs, dest, temp); \ + } + + INSN(sb, Register); + INSN(sh, Register); + INSN(sw, Register); + INSN(sd, Register); + INSN(fsw, FloatRegister); + INSN(fsd, FloatRegister); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rs, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + assert_different_registers(Rs, temp); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, (int32_t)distance + 0x800); \ + NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + NAME(Rs, temp, offset); \ + } \ + } \ + void NAME(Register Rs, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + assert_different_registers(Rs, temp); \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rs, adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ + int32_t offset= 0; \ + assert_different_registers(Rs, temp); \ + baseOffset32(temp, adr, offset); \ + NAME(Rs, temp, offset); \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(sb); + INSN(sh); + INSN(sw); + INSN(sd); + +#undef INSN + +#define INSN(NAME) \ + void NAME(FloatRegister Rs, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, (int32_t)distance + 0x800); \ + NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset); \ + NAME(Rs, temp, offset); \ + } \ + } \ + void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ + switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rs, adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ + int32_t offset = 0; \ + baseOffset32(temp, adr, offset); \ + NAME(Rs, temp, offset); \ + } \ + break; \ + } \ + default: \ + ShouldNotReachHere(); \ + } \ + } + + INSN(fsw); + INSN(fsd); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, const uint32_t csr, Register Rs1) { \ + guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch((address)&insn, 31, 20, csr); \ + emit(insn); \ + } + + INSN(csrrw, 0b1110011, 0b001); + INSN(csrrs, 0b1110011, 0b010); + INSN(csrrc, 0b1110011, 0b011); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, const uint32_t csr, const uint32_t uimm) { \ + guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \ + guarantee(is_unsigned_imm_in_range(uimm, 5, 0), "uimm is invalid"); \ + unsigned insn = 0; \ + uint32_t val = uimm & 0x1f; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 19, 15, val); \ + patch((address)&insn, 31, 20, csr); \ + emit(insn); \ + } + + INSN(csrrwi, 0b1110011, 0b101); + INSN(csrrsi, 0b1110011, 0b110); + INSN(csrrci, 0b1110011, 0b111); + +#undef INSN + +#define INSN(NAME, op) \ + void NAME(Register Rd, const int32_t offset) { \ + guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ + patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1)); \ + patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \ + patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \ + emit(insn); \ + } + + INSN(_jal, 0b1101111); + +#undef INSN + +#define INSN(NAME) \ + void NAME(Register Rd, const address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t offset = dest - pc(); \ + if (is_imm_in_range(offset, 20, 1)) { \ + NAME(Rd, offset); \ + } else { \ + assert_different_registers(Rd, temp); \ + int32_t off = 0; \ + movptr_with_offset(temp, dest, off); \ + jalr(Rd, temp, off); \ + } \ + } \ + void NAME(Register Rd, Label &L, Register temp = t0) { \ + assert_different_registers(Rd, temp); \ + wrap_label(Rd, L, temp, &Assembler::NAME); \ + } + + INSN(jal); + +#undef INSN + +#undef INSN_ENTRY_RELOC + +#define INSN(NAME, op, funct) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 14, 12, funct); \ + patch_reg((address)&insn, 15, Rs); \ + int32_t val = offset & 0xfff; \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ + } + + INSN(_jalr, 0b1100111, 0b000); + +#undef INSN + + enum barrier { + i = 0b1000, o = 0b0100, r = 0b0010, w = 0b0001, + ir = i | r, ow = o | w, iorw = i | o | r | w + }; + + void fence(const uint32_t predecessor, const uint32_t successor) { + unsigned insn = 0; + guarantee(predecessor < 16, "predecessor is invalid"); + guarantee(successor < 16, "successor is invalid"); + patch((address)&insn, 6, 0, 0b001111); + patch((address)&insn, 11, 7, 0b00000); + patch((address)&insn, 14, 12, 0b000); + patch((address)&insn, 19, 15, 0b00000); + patch((address)&insn, 23, 20, successor); + patch((address)&insn, 27, 24, predecessor); + patch((address)&insn, 31, 28, 0b0000); + emit(insn); + } + +#define INSN(NAME, op, funct3, funct7) \ + void NAME() { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 11, 7, 0b00000); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 19, 15, 0b00000); \ + patch((address)&insn, 31, 20, funct7); \ + emit(insn); \ + } + + INSN(fence_i, 0b0001111, 0b001, 0b000000000000); + INSN(ecall, 0b1110011, 0b000, 0b000000000000); + INSN(_ebreak, 0b1110011, 0b000, 0b000000000001); + +#undef INSN + +enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + patch((address)&insn, 31, 27, funct7); \ + patch((address)&insn, 26, 25, memory_order); \ + emit(insn); \ + } + + INSN(amoswap_w, 0b0101111, 0b010, 0b00001); + INSN(amoadd_w, 0b0101111, 0b010, 0b00000); + INSN(amoxor_w, 0b0101111, 0b010, 0b00100); + INSN(amoand_w, 0b0101111, 0b010, 0b01100); + INSN(amoor_w, 0b0101111, 0b010, 0b01000); + INSN(amomin_w, 0b0101111, 0b010, 0b10000); + INSN(amomax_w, 0b0101111, 0b010, 0b10100); + INSN(amominu_w, 0b0101111, 0b010, 0b11000); + INSN(amomaxu_w, 0b0101111, 0b010, 0b11100); + INSN(amoswap_d, 0b0101111, 0b011, 0b00001); + INSN(amoadd_d, 0b0101111, 0b011, 0b00000); + INSN(amoxor_d, 0b0101111, 0b011, 0b00100); + INSN(amoand_d, 0b0101111, 0b011, 0b01100); + INSN(amoor_d, 0b0101111, 0b011, 0b01000); + INSN(amomin_d, 0b0101111, 0b011, 0b10000); + INSN(amomax_d , 0b0101111, 0b011, 0b10100); + INSN(amominu_d, 0b0101111, 0b011, 0b11000); + INSN(amomaxu_d, 0b0101111, 0b011, 0b11100); +#undef INSN + +enum operand_size { int8, int16, int32, uint32, int64 }; + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) { \ + unsigned insn = 0; \ + uint32_t val = memory_order & 0x3; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch((address)&insn, 25, 20, 0b00000); \ + patch((address)&insn, 31, 27, funct7); \ + patch((address)&insn, 26, 25, val); \ + emit(insn); \ + } + + INSN(lr_w, 0b0101111, 0b010, 0b00010); + INSN(lr_d, 0b0101111, 0b011, 0b00010); + +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) { \ + unsigned insn = 0; \ + uint32_t val = memory_order & 0x3; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs2); \ + patch_reg((address)&insn, 20, Rs1); \ + patch((address)&insn, 31, 27, funct7); \ + patch((address)&insn, 26, 25, val); \ + emit(insn); \ + } + + INSN(sc_w, 0b0101111, 0b010, 0b00011); + INSN(sc_d, 0b0101111, 0b011, 0b00011); +#undef INSN + +#define INSN(NAME, op, funct5, funct7) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 24, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fsqrt_s, 0b1010011, 0b00000, 0b0101100); + INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101); + INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000); + INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001); +#undef INSN + +// Immediate Instruction +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + guarantee(is_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 20, imm & 0x00000fff); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(_addi, 0b0010011, 0b000); + INSN(slti, 0b0010011, 0b010); + INSN(_addiw, 0b0011011, 0b000); + INSN(_and_imm12, 0b0010011, 0b111); + INSN(ori, 0b0010011, 0b110); + INSN(xori, 0b0010011, 0b100); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs1, uint32_t imm) { \ + guarantee(is_unsigned_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \ + unsigned insn = 0; \ + patch((address)&insn,6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 20, imm & 0x00000fff); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(sltiu, 0b0010011, 0b011); + +#undef INSN + +// Shift Immediate Instruction +#define INSN(NAME, op, funct3, funct6) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + guarantee(shamt <= 0x3f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 25, 20, shamt); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(_slli, 0b0010011, 0b001, 0b000000); + INSN(_srai, 0b0010011, 0b101, 0b010000); + INSN(_srli, 0b0010011, 0b101, 0b000000); + +#undef INSN + +// Shift Word Immediate Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 24, 20, shamt); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(slliw, 0b0011011, 0b001, 0b0000000); + INSN(sraiw, 0b0011011, 0b101, 0b0100000); + INSN(srliw, 0b0011011, 0b101, 0b0000000); + +#undef INSN + +// Upper Immediate Instruction +#define INSN(NAME, op) \ + void NAME(Register Rd, int32_t imm) { \ + int32_t upperImm = imm >> 12; \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + upperImm &= 0x000fffff; \ + patch((address)&insn, 31, 12, upperImm); \ + emit(insn); \ + } + + INSN(_lui, 0b0110111); + INSN(auipc, 0b0010111); + +#undef INSN + +// Float and Double Rigster Instruction +#define INSN(NAME, op, funct2) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 26, 25, funct2); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + patch_reg((address)&insn, 27, Rs3); \ + emit(insn); \ + } + + INSN(fmadd_s, 0b1000011, 0b00); + INSN(fmsub_s, 0b1000111, 0b00); + INSN(fnmsub_s, 0b1001011, 0b00); + INSN(fnmadd_s, 0b1001111, 0b00); + INSN(fmadd_d, 0b1000011, 0b01); + INSN(fmsub_d, 0b1000111, 0b01); + INSN(fnmsub_d, 0b1001011, 0b01); + INSN(fnmadd_d, 0b1001111, 0b01); + +#undef INSN + +// Float and Double Rigster Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(fsgnj_s, 0b1010011, 0b000, 0b0010000); + INSN(fsgnjn_s, 0b1010011, 0b001, 0b0010000); + INSN(fsgnjx_s, 0b1010011, 0b010, 0b0010000); + INSN(fmin_s, 0b1010011, 0b000, 0b0010100); + INSN(fmax_s, 0b1010011, 0b001, 0b0010100); + INSN(fsgnj_d, 0b1010011, 0b000, 0b0010001); + INSN(fsgnjn_d, 0b1010011, 0b001, 0b0010001); + INSN(fsgnjx_d, 0b1010011, 0b010, 0b0010001); + INSN(fmin_d, 0b1010011, 0b000, 0b0010101); + INSN(fmax_d, 0b1010011, 0b001, 0b0010101); + +#undef INSN + +// Float and Double Rigster Arith Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, FloatRegister Rs1, FloatRegister Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(feq_s, 0b1010011, 0b010, 0b1010000); + INSN(flt_s, 0b1010011, 0b001, 0b1010000); + INSN(fle_s, 0b1010011, 0b000, 0b1010000); + INSN(feq_d, 0b1010011, 0b010, 0b1010001); + INSN(fle_d, 0b1010011, 0b000, 0b1010001); + INSN(flt_d, 0b1010011, 0b001, 0b1010001); +#undef INSN + +// Float and Double Arith Instruction +#define INSN(NAME, op, funct7) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(fadd_s, 0b1010011, 0b0000000); + INSN(fsub_s, 0b1010011, 0b0000100); + INSN(fmul_s, 0b1010011, 0b0001000); + INSN(fdiv_s, 0b1010011, 0b0001100); + INSN(fadd_d, 0b1010011, 0b0000001); + INSN(fsub_d, 0b1010011, 0b0000101); + INSN(fmul_d, 0b1010011, 0b0001001); + INSN(fdiv_d, 0b1010011, 0b0001101); + +#undef INSN + +// Whole Float and Double Conversion Instruction +#define INSN(NAME, op, funct5, funct7) \ + void NAME(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 24, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fcvt_s_w, 0b1010011, 0b00000, 0b1101000); + INSN(fcvt_s_wu, 0b1010011, 0b00001, 0b1101000); + INSN(fcvt_s_l, 0b1010011, 0b00010, 0b1101000); + INSN(fcvt_s_lu, 0b1010011, 0b00011, 0b1101000); + INSN(fcvt_d_w, 0b1010011, 0b00000, 0b1101001); + INSN(fcvt_d_wu, 0b1010011, 0b00001, 0b1101001); + INSN(fcvt_d_l, 0b1010011, 0b00010, 0b1101001); + INSN(fcvt_d_lu, 0b1010011, 0b00011, 0b1101001); + +#undef INSN + +// Float and Double Conversion Instruction +#define INSN(NAME, op, funct5, funct7) \ + void NAME(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, rm); \ + patch((address)&insn, 24, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fcvt_w_s, 0b1010011, 0b00000, 0b1100000); + INSN(fcvt_l_s, 0b1010011, 0b00010, 0b1100000); + INSN(fcvt_wu_s, 0b1010011, 0b00001, 0b1100000); + INSN(fcvt_lu_s, 0b1010011, 0b00011, 0b1100000); + INSN(fcvt_w_d, 0b1010011, 0b00000, 0b1100001); + INSN(fcvt_wu_d, 0b1010011, 0b00001, 0b1100001); + INSN(fcvt_l_d, 0b1010011, 0b00010, 0b1100001); + INSN(fcvt_lu_d, 0b1010011, 0b00011, 0b1100001); + +#undef INSN + +// Float and Double Move Instruction +#define INSN(NAME, op, funct3, funct5, funct7) \ + void NAME(FloatRegister Rd, Register Rs1) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000); + INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001); + +#undef INSN + +// Float and Double Conversion Instruction +#define INSN(NAME, op, funct3, funct5, funct7) \ + void NAME(Register Rd, FloatRegister Rs1) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 20, funct5); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(fclass_s, 0b1010011, 0b001, 0b00000, 0b1110000); + INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001); + INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000); + INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001); + +#undef INSN + +// ========================== +// RISC-V Vector Extension +// ========================== +enum SEW { + e8, + e16, + e32, + e64, + RESERVED, +}; + +enum LMUL { + mf8 = 0b101, + mf4 = 0b110, + mf2 = 0b111, + m1 = 0b000, + m2 = 0b001, + m4 = 0b010, + m8 = 0b011, +}; + +enum VMA { + mu, // undisturbed + ma, // agnostic +}; + +enum VTA { + tu, // undisturbed + ta, // agnostic +}; + +static Assembler::SEW elembytes_to_sew(int ebytes) { + assert(ebytes > 0 && ebytes <= 8, "unsupported element size"); + return (Assembler::SEW) exact_log2(ebytes); +} + +static Assembler::SEW elemtype_to_sew(BasicType etype) { + return Assembler::elembytes_to_sew(type2aelembytes(etype)); +} + +#define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill) \ + if (vill == 1) { \ + guarantee((vlmul | vsew | vta | vma == 0), \ + "the other bits in vtype shall be zero"); \ + } \ + patch((address)&insn, lsb + 2, lsb, vlmul); \ + patch((address)&insn, lsb + 5, lsb + 3, vsew); \ + patch((address)&insn, lsb + 6, vta); \ + patch((address)&insn, lsb + 7, vma); \ + patch((address)&insn, hsb - 1, lsb + 8, 0); \ + patch((address)&insn, hsb, vill) + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs1, SEW sew, LMUL lmul = m1, \ + VMA vma = mu, VTA vta = tu, bool vill = false) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch_vtype(30, 20, lmul, sew, vta, vma, vill); \ + patch((address)&insn, 31, 0); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(vsetvli, 0b1010111, 0b111); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, uint32_t imm, SEW sew, LMUL lmul = m1, \ + VMA vma = mu, VTA vta = tu, bool vill = false) { \ + unsigned insn = 0; \ + guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 19, 15, imm); \ + patch_vtype(29, 20, lmul, sew, vta, vma, vill); \ + patch((address)&insn, 31, 30, 0b11); \ + patch_reg((address)&insn, 7, Rd); \ + emit(insn); \ + } + + INSN(vsetivli, 0b1010111, 0b111); + +#undef INSN + +#undef patch_vtype + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + // Vector Configuration Instruction + INSN(vsetvl, 0b1010111, 0b111, 0b1000000); + +#undef INSN + +enum VectorMask { + v0_t = 0b0, + unmasked = 0b1 +}; + +#define patch_VArith(op, Reg, funct3, Reg_or_Imm5, Vs2, vm, funct6) \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 19, 15, Reg_or_Imm5); \ + patch((address)&insn, 25, vm); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Reg); \ + patch_reg((address)&insn, 20, Vs2); \ + emit(insn) + +// r2_vm +#define INSN(NAME, op, funct3, Vs1, funct6) \ + void NAME(Register Rd, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Mask + INSN(vpopc_m, 0b1010111, 0b010, 0b10000, 0b010000); + INSN(vfirst_m, 0b1010111, 0b010, 0b10001, 0b010000); +#undef INSN + +#define INSN(NAME, op, funct3, Vs1, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Integer Extension + INSN(vzext_vf2, 0b1010111, 0b010, 0b00110, 0b010010); + INSN(vzext_vf4, 0b1010111, 0b010, 0b00100, 0b010010); + INSN(vzext_vf8, 0b1010111, 0b010, 0b00010, 0b010010); + INSN(vsext_vf2, 0b1010111, 0b010, 0b00111, 0b010010); + INSN(vsext_vf4, 0b1010111, 0b010, 0b00101, 0b010010); + INSN(vsext_vf8, 0b1010111, 0b010, 0b00011, 0b010010); + + // Vector Mask + INSN(vmsbf_m, 0b1010111, 0b010, 0b00001, 0b010100); + INSN(vmsif_m, 0b1010111, 0b010, 0b00011, 0b010100); + INSN(vmsof_m, 0b1010111, 0b010, 0b00010, 0b010100); + INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100); + + // Vector Single-Width Floating-Point/Integer Type-Convert Instructions + INSN(vfcvt_xu_f_v, 0b1010111, 0b001, 0b00000, 0b010010); + INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010); + INSN(vfcvt_f_xu_v, 0b1010111, 0b001, 0b00010, 0b010010); + INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010); + INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010); + INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010); + + // Vector Floating-Point Instruction + INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011); + INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011); + +#undef INSN + +// r2rd +#define INSN(NAME, op, funct3, simm5, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2) { \ + patch_VArith(op, Vd, funct3, simm5, Vs2, vm, funct6); \ + } + + // Vector Whole Vector Register Move + INSN(vmv1r_v, 0b1010111, 0b011, 0b00000, 0b1, 0b100111); + INSN(vmv2r_v, 0b1010111, 0b011, 0b00001, 0b1, 0b100111); + INSN(vmv4r_v, 0b1010111, 0b011, 0b00011, 0b1, 0b100111); + INSN(vmv8r_v, 0b1010111, 0b011, 0b00111, 0b1, 0b100111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs1, vm, funct6) \ + void NAME(FloatRegister Rd, VectorRegister Vs2) { \ + patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Floating-Point Move Instruction + INSN(vfmv_f_s, 0b1010111, 0b001, 0b00000, 0b1, 0b010000); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs1, vm, funct6) \ + void NAME(Register Rd, VectorRegister Vs2) { \ + patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ + } + + // Vector Integer Scalar Move Instructions + INSN(vmv_x_s, 0b1010111, 0b010, 0b00000, 0b1, 0b010000); + +#undef INSN + +// r_vm +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { \ + guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ + } + + // Vector Single-Width Bit Shift Instructions + INSN(vsra_vi, 0b1010111, 0b011, 0b101001); + INSN(vsrl_vi, 0b1010111, 0b011, 0b101000); + INSN(vsll_vi, 0b1010111, 0b011, 0b100101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Floating-Point Fused Multiply-Add Instructions + INSN(vfnmsub_vv, 0b1010111, 0b001, 0b101011); + INSN(vfmsub_vv, 0b1010111, 0b001, 0b101010); + INSN(vfnmadd_vv, 0b1010111, 0b001, 0b101001); + INSN(vfmadd_vv, 0b1010111, 0b001, 0b101000); + INSN(vfnmsac_vv, 0b1010111, 0b001, 0b101111); + INSN(vfmsac_vv, 0b1010111, 0b001, 0b101110); + INSN(vfmacc_vv, 0b1010111, 0b001, 0b101100); + INSN(vfnmacc_vv, 0b1010111, 0b001, 0b101101); + + // Vector Single-Width Integer Multiply-Add Instructions + INSN(vnmsub_vv, 0b1010111, 0b010, 0b101011); + INSN(vmadd_vv, 0b1010111, 0b010, 0b101001); + INSN(vnmsac_vv, 0b1010111, 0b010, 0b101111); + INSN(vmacc_vv, 0b1010111, 0b010, 0b101101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Integer Multiply-Add Instructions + INSN(vnmsub_vx, 0b1010111, 0b110, 0b101011); + INSN(vmadd_vx, 0b1010111, 0b110, 0b101001); + INSN(vnmsac_vx, 0b1010111, 0b110, 0b101111); + INSN(vmacc_vx, 0b1010111, 0b110, 0b101101); + + INSN(vrsub_vx, 0b1010111, 0b100, 0b000011); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, FloatRegister Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Floating-Point Fused Multiply-Add Instructions + INSN(vfnmsub_vf, 0b1010111, 0b101, 0b101011); + INSN(vfmsub_vf, 0b1010111, 0b101, 0b101010); + INSN(vfnmadd_vf, 0b1010111, 0b101, 0b101001); + INSN(vfmadd_vf, 0b1010111, 0b101, 0b101000); + INSN(vfnmsac_vf, 0b1010111, 0b101, 0b101111); + INSN(vfmsac_vf, 0b1010111, 0b101, 0b101110); + INSN(vfmacc_vf, 0b1010111, 0b101, 0b101100); + INSN(vfnmacc_vf, 0b1010111, 0b101, 0b101101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Single-Width Floating-Point Reduction Instructions + INSN(vfredsum_vs, 0b1010111, 0b001, 0b000001); + INSN(vfredosum_vs, 0b1010111, 0b001, 0b000011); + INSN(vfredmin_vs, 0b1010111, 0b001, 0b000101); + INSN(vfredmax_vs, 0b1010111, 0b001, 0b000111); + + // Vector Single-Width Integer Reduction Instructions + INSN(vredsum_vs, 0b1010111, 0b010, 0b000000); + INSN(vredand_vs, 0b1010111, 0b010, 0b000001); + INSN(vredor_vs, 0b1010111, 0b010, 0b000010); + INSN(vredxor_vs, 0b1010111, 0b010, 0b000011); + INSN(vredminu_vs, 0b1010111, 0b010, 0b000100); + INSN(vredmin_vs, 0b1010111, 0b010, 0b000101); + INSN(vredmaxu_vs, 0b1010111, 0b010, 0b000110); + INSN(vredmax_vs, 0b1010111, 0b010, 0b000111); + + // Vector Floating-Point Compare Instructions + INSN(vmfle_vv, 0b1010111, 0b001, 0b011001); + INSN(vmflt_vv, 0b1010111, 0b001, 0b011011); + INSN(vmfne_vv, 0b1010111, 0b001, 0b011100); + INSN(vmfeq_vv, 0b1010111, 0b001, 0b011000); + + // Vector Floating-Point Sign-Injection Instructions + INSN(vfsgnjx_vv, 0b1010111, 0b001, 0b001010); + INSN(vfsgnjn_vv, 0b1010111, 0b001, 0b001001); + INSN(vfsgnj_vv, 0b1010111, 0b001, 0b001000); + + // Vector Floating-Point MIN/MAX Instructions + INSN(vfmax_vv, 0b1010111, 0b001, 0b000110); + INSN(vfmin_vv, 0b1010111, 0b001, 0b000100); + + // Vector Single-Width Floating-Point Multiply/Divide Instructions + INSN(vfdiv_vv, 0b1010111, 0b001, 0b100000); + INSN(vfmul_vv, 0b1010111, 0b001, 0b100100); + + // Vector Single-Width Floating-Point Add/Subtract Instructions + INSN(vfsub_vv, 0b1010111, 0b001, 0b000010); + INSN(vfadd_vv, 0b1010111, 0b001, 0b000000); + + // Vector Single-Width Fractional Multiply with Rounding and Saturation + INSN(vsmul_vv, 0b1010111, 0b000, 0b100111); + + // Vector Integer Divide Instructions + INSN(vrem_vv, 0b1010111, 0b010, 0b100011); + INSN(vremu_vv, 0b1010111, 0b010, 0b100010); + INSN(vdiv_vv, 0b1010111, 0b010, 0b100001); + INSN(vdivu_vv, 0b1010111, 0b010, 0b100000); + + // Vector Single-Width Integer Multiply Instructions + INSN(vmulhsu_vv, 0b1010111, 0b010, 0b100110); + INSN(vmulhu_vv, 0b1010111, 0b010, 0b100100); + INSN(vmulh_vv, 0b1010111, 0b010, 0b100111); + INSN(vmul_vv, 0b1010111, 0b010, 0b100101); + + // Vector Integer Min/Max Instructions + INSN(vmax_vv, 0b1010111, 0b000, 0b000111); + INSN(vmaxu_vv, 0b1010111, 0b000, 0b000110); + INSN(vmin_vv, 0b1010111, 0b000, 0b000101); + INSN(vminu_vv, 0b1010111, 0b000, 0b000100); + + // Vector Integer Comparison Instructions + INSN(vmsle_vv, 0b1010111, 0b000, 0b011101); + INSN(vmsleu_vv, 0b1010111, 0b000, 0b011100); + INSN(vmslt_vv, 0b1010111, 0b000, 0b011011); + INSN(vmsltu_vv, 0b1010111, 0b000, 0b011010); + INSN(vmsne_vv, 0b1010111, 0b000, 0b011001); + INSN(vmseq_vv, 0b1010111, 0b000, 0b011000); + + // Vector Single-Width Bit Shift Instructions + INSN(vsra_vv, 0b1010111, 0b000, 0b101001); + INSN(vsrl_vv, 0b1010111, 0b000, 0b101000); + INSN(vsll_vv, 0b1010111, 0b000, 0b100101); + + // Vector Bitwise Logical Instructions + INSN(vxor_vv, 0b1010111, 0b000, 0b001011); + INSN(vor_vv, 0b1010111, 0b000, 0b001010); + INSN(vand_vv, 0b1010111, 0b000, 0b001001); + + // Vector Single-Width Integer Add and Subtract + INSN(vsub_vv, 0b1010111, 0b000, 0b000010); + INSN(vadd_vv, 0b1010111, 0b000, 0b000000); + +#undef INSN + + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Integer Divide Instructions + INSN(vrem_vx, 0b1010111, 0b110, 0b100011); + INSN(vremu_vx, 0b1010111, 0b110, 0b100010); + INSN(vdiv_vx, 0b1010111, 0b110, 0b100001); + INSN(vdivu_vx, 0b1010111, 0b110, 0b100000); + + // Vector Single-Width Integer Multiply Instructions + INSN(vmulhsu_vx, 0b1010111, 0b110, 0b100110); + INSN(vmulhu_vx, 0b1010111, 0b110, 0b100100); + INSN(vmulh_vx, 0b1010111, 0b110, 0b100111); + INSN(vmul_vx, 0b1010111, 0b110, 0b100101); + + // Vector Integer Min/Max Instructions + INSN(vmax_vx, 0b1010111, 0b100, 0b000111); + INSN(vmaxu_vx, 0b1010111, 0b100, 0b000110); + INSN(vmin_vx, 0b1010111, 0b100, 0b000101); + INSN(vminu_vx, 0b1010111, 0b100, 0b000100); + + // Vector Integer Comparison Instructions + INSN(vmsgt_vx, 0b1010111, 0b100, 0b011111); + INSN(vmsgtu_vx, 0b1010111, 0b100, 0b011110); + INSN(vmsle_vx, 0b1010111, 0b100, 0b011101); + INSN(vmsleu_vx, 0b1010111, 0b100, 0b011100); + INSN(vmslt_vx, 0b1010111, 0b100, 0b011011); + INSN(vmsltu_vx, 0b1010111, 0b100, 0b011010); + INSN(vmsne_vx, 0b1010111, 0b100, 0b011001); + INSN(vmseq_vx, 0b1010111, 0b100, 0b011000); + + // Vector Narrowing Integer Right Shift Instructions + INSN(vnsra_wx, 0b1010111, 0b100, 0b101101); + INSN(vnsrl_wx, 0b1010111, 0b100, 0b101100); + + // Vector Single-Width Bit Shift Instructions + INSN(vsra_vx, 0b1010111, 0b100, 0b101001); + INSN(vsrl_vx, 0b1010111, 0b100, 0b101000); + INSN(vsll_vx, 0b1010111, 0b100, 0b100101); + + // Vector Bitwise Logical Instructions + INSN(vxor_vx, 0b1010111, 0b100, 0b001011); + INSN(vor_vx, 0b1010111, 0b100, 0b001010); + INSN(vand_vx, 0b1010111, 0b100, 0b001001); + + // Vector Single-Width Integer Add and Subtract + INSN(vsub_vx, 0b1010111, 0b100, 0b000010); + INSN(vadd_vx, 0b1010111, 0b100, 0b000000); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Floating-Point Compare Instructions + INSN(vmfge_vf, 0b1010111, 0b101, 0b011111); + INSN(vmfgt_vf, 0b1010111, 0b101, 0b011101); + INSN(vmfle_vf, 0b1010111, 0b101, 0b011001); + INSN(vmflt_vf, 0b1010111, 0b101, 0b011011); + INSN(vmfne_vf, 0b1010111, 0b101, 0b011100); + INSN(vmfeq_vf, 0b1010111, 0b101, 0b011000); + + // Vector Floating-Point Sign-Injection Instructions + INSN(vfsgnjx_vf, 0b1010111, 0b101, 0b001010); + INSN(vfsgnjn_vf, 0b1010111, 0b101, 0b001001); + INSN(vfsgnj_vf, 0b1010111, 0b101, 0b001000); + + // Vector Floating-Point MIN/MAX Instructions + INSN(vfmax_vf, 0b1010111, 0b101, 0b000110); + INSN(vfmin_vf, 0b1010111, 0b101, 0b000100); + + // Vector Single-Width Floating-Point Multiply/Divide Instructions + INSN(vfdiv_vf, 0b1010111, 0b101, 0b100000); + INSN(vfmul_vf, 0b1010111, 0b101, 0b100100); + INSN(vfrdiv_vf, 0b1010111, 0b101, 0b100001); + + // Vector Single-Width Floating-Point Add/Subtract Instructions + INSN(vfsub_vf, 0b1010111, 0b101, 0b000010); + INSN(vfadd_vf, 0b1010111, 0b101, 0b000000); + INSN(vfrsub_vf, 0b1010111, 0b101, 0b100111); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) { \ + guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)imm & 0x1f, Vs2, vm, funct6); \ + } + + INSN(vmsgt_vi, 0b1010111, 0b011, 0b011111); + INSN(vmsgtu_vi, 0b1010111, 0b011, 0b011110); + INSN(vmsle_vi, 0b1010111, 0b011, 0b011101); + INSN(vmsleu_vi, 0b1010111, 0b011, 0b011100); + INSN(vmsne_vi, 0b1010111, 0b011, 0b011001); + INSN(vmseq_vi, 0b1010111, 0b011, 0b011000); + INSN(vxor_vi, 0b1010111, 0b011, 0b001011); + INSN(vor_vi, 0b1010111, 0b011, 0b001010); + INSN(vand_vi, 0b1010111, 0b011, 0b001001); + INSN(vadd_vi, 0b1010111, 0b011, 0b000000); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(VectorRegister Vd, int32_t imm, VectorRegister Vs2, VectorMask vm = unmasked) { \ + guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ + } + + INSN(vrsub_vi, 0b1010111, 0b011, 0b000011); + +#undef INSN + +#define INSN(NAME, op, funct3, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Compress Instruction + INSN(vcompress_vm, 0b1010111, 0b010, 0b1, 0b010111); + + // Vector Mask-Register Logical Instructions + INSN(vmxnor_mm, 0b1010111, 0b010, 0b1, 0b011111); + INSN(vmornot_mm, 0b1010111, 0b010, 0b1, 0b011100); + INSN(vmnor_mm, 0b1010111, 0b010, 0b1, 0b011110); + INSN(vmor_mm, 0b1010111, 0b010, 0b1, 0b011010); + INSN(vmxor_mm, 0b1010111, 0b010, 0b1, 0b011011); + INSN(vmandnot_mm, 0b1010111, 0b010, 0b1, 0b011000); + INSN(vmnand_mm, 0b1010111, 0b010, 0b1, 0b011101); + INSN(vmand_mm, 0b1010111, 0b010, 0b1, 0b011001); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, int32_t imm) { \ + guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ + patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ + } + + // Vector Integer Move Instructions + INSN(vmv_v_i, 0b1010111, 0b011, v0, 0b1, 0b010111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, FloatRegister Rs1) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Floating-Point Scalar Move Instructions + INSN(vfmv_s_f, 0b1010111, 0b101, v0, 0b1, 0b010000); + // Vector Floating-Point Move Instruction + INSN(vfmv_v_f, 0b1010111, 0b101, v0, 0b1, 0b010111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, VectorRegister Vs1) { \ + patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Vector Integer Move Instructions + INSN(vmv_v_v, 0b1010111, 0b000, v0, 0b1, 0b010111); + +#undef INSN + +#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ + void NAME(VectorRegister Vd, Register Rs1) { \ + patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ + } + + // Integer Scalar Move Instructions + INSN(vmv_s_x, 0b1010111, 0b110, v0, 0b1, 0b010000); + + // Vector Integer Move Instructions + INSN(vmv_v_x, 0b1010111, 0b100, v0, 0b1, 0b010111); + +#undef INSN +#undef patch_VArith + +#define INSN(NAME, op, funct13, funct6) \ + void NAME(VectorRegister Vd, VectorMask vm = unmasked) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 24, 12, funct13); \ + patch((address)&insn, 25, vm); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Vd); \ + emit(insn); \ + } + + // Vector Element Index Instruction + INSN(vid_v, 0b1010111, 0b0000010001010, 0b010100); + +#undef INSN + +enum Nf { + g1 = 0b000, + g2 = 0b001, + g3 = 0b010, + g4 = 0b011, + g5 = 0b100, + g6 = 0b101, + g7 = 0b110, + g8 = 0b111 +}; + +#define patch_VLdSt(op, VReg, width, Rs1, Reg_or_umop, vm, mop, mew, nf) \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, width); \ + patch((address)&insn, 24, 20, Reg_or_umop); \ + patch((address)&insn, 25, vm); \ + patch((address)&insn, 27, 26, mop); \ + patch((address)&insn, 28, mew); \ + patch((address)&insn, 31, 29, nf); \ + patch_reg((address)&insn, 7, VReg); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn) + +#define INSN(NAME, op, lumop, vm, mop, nf) \ + void NAME(VectorRegister Vd, Register Rs1, uint32_t width = 0, bool mew = false) { \ + guarantee(is_unsigned_imm_in_range(width, 3, 0), "width is invalid"); \ + patch_VLdSt(op, Vd, width, Rs1, lumop, vm, mop, mew, nf); \ + } + + // Vector Load/Store Instructions + INSN(vl1r_v, 0b0000111, 0b01000, 0b1, 0b00, g1); + +#undef INSN + +#define INSN(NAME, op, width, sumop, vm, mop, mew, nf) \ + void NAME(VectorRegister Vs3, Register Rs1) { \ + patch_VLdSt(op, Vs3, width, Rs1, sumop, vm, mop, mew, nf); \ + } + + // Vector Load/Store Instructions + INSN(vs1r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1); + +#undef INSN + +// r2_nfvm +#define INSN(NAME, op, width, umop, mop, mew) \ + void NAME(VectorRegister Vd_or_Vs3, Register Rs1, Nf nf = g1) { \ + patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, 1, mop, mew, nf); \ + } + + // Vector Unit-Stride Instructions + INSN(vle1_v, 0b0000111, 0b000, 0b01011, 0b00, 0b0); + INSN(vse1_v, 0b0100111, 0b000, 0b01011, 0b00, 0b0); + +#undef INSN + +#define INSN(NAME, op, width, umop, mop, mew) \ + void NAME(VectorRegister Vd_or_Vs3, Register Rs1, VectorMask vm = unmasked, Nf nf = g1) { \ + patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, vm, mop, mew, nf); \ + } + + // Vector Unit-Stride Instructions + INSN(vle8_v, 0b0000111, 0b000, 0b00000, 0b00, 0b0); + INSN(vle16_v, 0b0000111, 0b101, 0b00000, 0b00, 0b0); + INSN(vle32_v, 0b0000111, 0b110, 0b00000, 0b00, 0b0); + INSN(vle64_v, 0b0000111, 0b111, 0b00000, 0b00, 0b0); + + // Vector unit-stride fault-only-first Instructions + INSN(vle8ff_v, 0b0000111, 0b000, 0b10000, 0b00, 0b0); + INSN(vle16ff_v, 0b0000111, 0b101, 0b10000, 0b00, 0b0); + INSN(vle32ff_v, 0b0000111, 0b110, 0b10000, 0b00, 0b0); + INSN(vle64ff_v, 0b0000111, 0b111, 0b10000, 0b00, 0b0); + + INSN(vse8_v, 0b0100111, 0b000, 0b00000, 0b00, 0b0); + INSN(vse16_v, 0b0100111, 0b101, 0b00000, 0b00, 0b0); + INSN(vse32_v, 0b0100111, 0b110, 0b00000, 0b00, 0b0); + INSN(vse64_v, 0b0100111, 0b111, 0b00000, 0b00, 0b0); + +#undef INSN + +#define INSN(NAME, op, width, mop, mew) \ + void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked, Nf nf = g1) { \ + patch_VLdSt(op, Vd, width, Rs1, Vs2->encoding_nocheck(), vm, mop, mew, nf); \ + } + + // Vector unordered indexed load instructions + INSN(vluxei8_v, 0b0000111, 0b000, 0b01, 0b0); + INSN(vluxei16_v, 0b0000111, 0b101, 0b01, 0b0); + INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0); + INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0); + + // Vector ordered indexed load instructions + INSN(vloxei8_v, 0b0000111, 0b000, 0b11, 0b0); + INSN(vloxei16_v, 0b0000111, 0b101, 0b11, 0b0); + INSN(vloxei32_v, 0b0000111, 0b110, 0b11, 0b0); + INSN(vloxei64_v, 0b0000111, 0b111, 0b11, 0b0); +#undef INSN + +#define INSN(NAME, op, width, mop, mew) \ + void NAME(VectorRegister Vd, Register Rs1, Register Rs2, VectorMask vm = unmasked, Nf nf = g1) { \ + patch_VLdSt(op, Vd, width, Rs1, Rs2->encoding_nocheck(), vm, mop, mew, nf); \ + } + + // Vector Strided Instructions + INSN(vlse8_v, 0b0000111, 0b000, 0b10, 0b0); + INSN(vlse16_v, 0b0000111, 0b101, 0b10, 0b0); + INSN(vlse32_v, 0b0000111, 0b110, 0b10, 0b0); + INSN(vlse64_v, 0b0000111, 0b111, 0b10, 0b0); + +#undef INSN +#undef patch_VLdSt + +// ==================================== +// RISC-V Bit-Manipulation Extension +// ==================================== +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + patch_reg((address)&insn, 20, Rs2); \ + emit(insn); \ + } + + INSN(add_uw, 0b0111011, 0b000, 0b0000100); + INSN(rol, 0b0110011, 0b001, 0b0110000); + INSN(rolw, 0b0111011, 0b001, 0b0110000); + INSN(ror, 0b0110011, 0b101, 0b0110000); + INSN(rorw, 0b0111011, 0b101, 0b0110000); + INSN(sh1add, 0b0110011, 0b010, 0b0010000); + INSN(sh2add, 0b0110011, 0b100, 0b0010000); + INSN(sh3add, 0b0110011, 0b110, 0b0010000); + INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000); + INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000); + INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000); + INSN(andn, 0b0110011, 0b111, 0b0100000); + INSN(orn, 0b0110011, 0b110, 0b0100000); + INSN(xnor, 0b0110011, 0b100, 0b0100000); + INSN(max, 0b0110011, 0b110, 0b0000101); + INSN(maxu, 0b0110011, 0b111, 0b0000101); + INSN(min, 0b0110011, 0b100, 0b0000101); + INSN(minu, 0b0110011, 0b101, 0b0000101); + +#undef INSN + +#define INSN(NAME, op, funct3, funct12) \ + void NAME(Register Rd, Register Rs1) { \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 31, 20, funct12); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(rev8, 0b0010011, 0b101, 0b011010111000); + INSN(sext_b, 0b0010011, 0b001, 0b011000000100); + INSN(sext_h, 0b0010011, 0b001, 0b011000000101); + INSN(zext_h, 0b0111011, 0b100, 0b000010000000); + INSN(clz, 0b0010011, 0b001, 0b011000000000); + INSN(clzw, 0b0011011, 0b001, 0b011000000000); + INSN(ctz, 0b0010011, 0b001, 0b011000000001); + INSN(ctzw, 0b0011011, 0b001, 0b011000000001); + INSN(cpop, 0b0010011, 0b001, 0b011000000010); + INSN(cpopw, 0b0011011, 0b001, 0b011000000010); + INSN(orc_b, 0b0010011, 0b101, 0b001010000111); + +#undef INSN + +#define INSN(NAME, op, funct3, funct6) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x3f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 25, 20, shamt); \ + patch((address)&insn, 31, 26, funct6); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(rori, 0b0010011, 0b101, 0b011000); + INSN(slli_uw, 0b0011011, 0b001, 0b000010); + +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ + patch((address)&insn, 24, 20, shamt); \ + patch((address)&insn, 31, 25, funct7); \ + patch_reg((address)&insn, 7, Rd); \ + patch_reg((address)&insn, 15, Rs1); \ + emit(insn); \ + } + + INSN(roriw, 0b0011011, 0b101, 0b0110000); + +#undef INSN + +// ======================================== +// RISC-V Compressed Instructions Extension +// ======================================== +// Note: +// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be +// transformed to 16-bit instructions if compressible. +// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li', +// but most of time we have no need to explicitly use these instructions. +// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range +// are qualified to be compressed with their 2-byte versions. +// An example: +// +// CompressibleRegion cr(_masm); +// __ andr(...); // this instruction could change to c.and if able to +// +// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from +// normal ones. +// + +private: + bool _in_compressible_region; +public: + bool in_compressible_region() const { return _in_compressible_region; } + void set_in_compressible_region(bool b) { _in_compressible_region = b; } +public: + + // a compressible region + class CompressibleRegion : public StackObj { + protected: + Assembler *_masm; + bool _saved_in_compressible_region; + public: + CompressibleRegion(Assembler *_masm) + : _masm(_masm) + , _saved_in_compressible_region(_masm->in_compressible_region()) { + _masm->set_in_compressible_region(true); + } + ~CompressibleRegion() { + _masm->set_in_compressible_region(_saved_in_compressible_region); + } + }; + + // patch a 16-bit instruction. + static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) { + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 15); + unsigned nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + uint16_t mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + uint16_t target = *(uint16_t *)a; + target &= ~mask; + target |= val; + *(uint16_t *)a = target; + } + + static void c_patch(address a, unsigned bit, uint16_t val) { + c_patch(a, bit, bit, val); + } + + // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits) + static void c_patch_reg(address a, unsigned lsb, Register reg) { + c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits) + static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) { + c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + + // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits) + static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) { + c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits) + static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) { + c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + +// -------------- RVC Instruction Definitions -------------- + + void c_nop() { + c_addi(x0, 0); + } + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi, 0b000, 0b01); + INSN(c_addiw, 0b001, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 10, 0)); \ + assert_cond((imm & 0b1111) == 0); \ + assert_cond(imm != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \ + c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \ + c_patch_reg((address)&insn, 7, sp); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi16sp, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 10, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(uimm != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd); \ + c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \ + c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \ + c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_addi4spn, 0b000, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_slli, 0b000, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 11, 10, funct2); \ + c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_srli, 0b100, 0b00, 0b01); + INSN(c_srai, 0b100, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 11, 10, funct2); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_andi, 0b100, 0b10, 0b01); + +#undef INSN + +#define INSN(NAME, funct6, funct2, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 6, 5, funct2); \ + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 15, 10, funct6); \ + emit_int16(insn); \ + } + + INSN(c_sub, 0b100011, 0b00, 0b01); + INSN(c_xor, 0b100011, 0b01, 0b01); + INSN(c_or, 0b100011, 0b10, 0b01); + INSN(c_and, 0b100011, 0b11, 0b01); + INSN(c_subw, 0b100111, 0b00, 0b01); + INSN(c_addw, 0b100111, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch_reg((address)&insn, 7, Rd_Rs1); \ + c_patch((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(c_mv, 0b1000, 0b10); + INSN(c_add, 0b1001, 0b10); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rs1) { \ + assert_cond(Rs1 != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, x0); \ + c_patch_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(c_jr, 0b1000, 0b10); + INSN(c_jalr, 0b1001, 0b10); + +#undef INSN + + typedef void (Assembler::* j_c_insn)(address dest); + typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest); + + void wrap_label(Label &L, j_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc()); + } + } + + void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, pc()); + } + } + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t offset) { \ + assert_cond(is_imm_in_range(offset, 11, 1)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1); \ + c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7); \ + c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10); \ + c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8); \ + c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4); \ + c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 11, 1)); \ + c_j(distance); \ + } \ + void NAME(Label &L) { \ + wrap_label(L, &Assembler::NAME); \ + } + + INSN(c_j, 0b101, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 8, 1)); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \ + c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(Register Rs1, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 8, 1)); \ + NAME(Rs1, distance); \ + } \ + void NAME(Register Rs1, Label &L) { \ + wrap_label(L, Rs1, &Assembler::NAME); \ + } + + INSN(c_beqz, 0b110, 0b01); + INSN(c_bnez, 0b111, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 18, 0)); \ + assert_cond((imm & 0xfff) == 0); \ + assert_cond(imm != 0); \ + assert_cond(Rd != x0 && Rd != x2); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lui, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_li, 0b010, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ldsp, 0b011, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(FloatRegister Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_fldsp, 0b001, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ + c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ld, 0b011, 0b00, Register); + INSN(c_sd, 0b111, 0b00, Register); + INSN(c_fld, 0b001, 0b00, FloatRegister); + INSN(c_fsd, 0b101, 0b00, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_sdsp, 0b111, 0b10, Register); + INSN(c_fsdsp, 0b101, 0b10, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_reg((address)&insn, 2, Rs2); \ + c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \ + c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_swsp, 0b110, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \ + c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \ + c_patch_reg((address)&insn, 7, Rd); \ + c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lwsp, 0b010, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 7, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ + c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \ + c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + c_patch_compressed_reg((address)&insn, 7, Rs1); \ + c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_lw, 0b010, 0b00); + INSN(c_sw, 0b110, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME() { \ + uint16_t insn = 0; \ + c_patch((address)&insn, 1, 0, op); \ + c_patch((address)&insn, 11, 2, 0x0); \ + c_patch((address)&insn, 12, 12, 0b1); \ + c_patch((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(c_ebreak, 0b100, 0b10); + +#undef INSN + +// -------------- RVC Transformation Functions -------------- + +// -------------------------- +// Register instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* add -> c.add */ \ + if (do_compress()) { \ + Register src = noreg; \ + if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ + c_add(Rd, src); \ + return; \ + } \ + } \ + _add(Rd, Rs1, Rs2); \ + } + + INSN(add); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* sub/subw -> c.sub/c.subw */ \ + if (do_compress() && \ + (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) { \ + C_NAME(Rd, Rs2); \ + return; \ + } \ + NORMAL_NAME(Rd, Rs1, Rs2); \ + } + + INSN(sub, c_sub, _sub); + INSN(subw, c_subw, _subw); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */ \ + if (do_compress()) { \ + Register src = noreg; \ + if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \ + ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ + C_NAME(Rd, src); \ + return; \ + } \ + } \ + NORMAL_NAME(Rd, Rs1, Rs2); \ + } + + INSN(andr, c_and, _andr); + INSN(orr, c_or, _orr); + INSN(xorr, c_xor, _xorr); + INSN(addw, c_addw, _addw); + +#undef INSN + +private: +// some helper functions + bool do_compress() const { + return UseRVC && in_compressible_region(); + } + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0 && \ + (!ld || rd_rs2 != x0); \ + } \ + + FUNC(is_c_ldsdsp, 0b111, 9); + FUNC(is_c_lwswsp, 0b011, 8); + +#undef FUNC + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, int32_t imm12) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_c_fldsdsp, 0b111, 9); + +#undef FUNC + +#define FUNC(NAME, REG_TYPE, funct3, bits) \ + bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \ + return rs1->is_compressed_valid() && \ + rd_rs2->is_compressed_valid() && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_c_ldsd, Register, 0b111, 8); + FUNC(is_c_lwsw, Register, 0b011, 7); + FUNC(is_c_fldsd, FloatRegister, 0b111, 8); + +#undef FUNC + +public: +// -------------------------- +// Load/store register +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* lw -> c.lwsp/c.lw */ \ + if (do_compress()) { \ + if (is_c_lwswsp(Rs, Rd, offset, true)) { \ + c_lwsp(Rd, offset); \ + return; \ + } else if (is_c_lwsw(Rs, Rd, offset)) { \ + c_lw(Rd, Rs, offset); \ + return; \ + } \ + } \ + _lw(Rd, Rs, offset); \ + } + + INSN(lw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* ld -> c.ldsp/c.ld */ \ + if (do_compress()) { \ + if (is_c_ldsdsp(Rs, Rd, offset, true)) { \ + c_ldsp(Rd, offset); \ + return; \ + } else if (is_c_ldsd(Rs, Rd, offset)) { \ + c_ld(Rd, Rs, offset); \ + return; \ + } \ + } \ + _ld(Rd, Rs, offset); \ + } + + INSN(ld); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + /* fld -> c.fldsp/c.fld */ \ + if (do_compress()) { \ + if (is_c_fldsdsp(Rs, offset)) { \ + c_fldsp(Rd, offset); \ + return; \ + } else if (is_c_fldsd(Rs, Rd, offset)) { \ + c_fld(Rd, Rs, offset); \ + return; \ + } \ + } \ + _fld(Rd, Rs, offset); \ + } + + INSN(fld); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* sd -> c.sdsp/c.sd */ \ + if (do_compress()) { \ + if (is_c_ldsdsp(Rs, Rd, offset, false)) { \ + c_sdsp(Rd, offset); \ + return; \ + } else if (is_c_ldsd(Rs, Rd, offset)) { \ + c_sd(Rd, Rs, offset); \ + return; \ + } \ + } \ + _sd(Rd, Rs, offset); \ + } + + INSN(sd); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* sw -> c.swsp/c.sw */ \ + if (do_compress()) { \ + if (is_c_lwswsp(Rs, Rd, offset, false)) { \ + c_swsp(Rd, offset); \ + return; \ + } else if (is_c_lwsw(Rs, Rd, offset)) { \ + c_sw(Rd, Rs, offset); \ + return; \ + } \ + } \ + _sw(Rd, Rs, offset); \ + } + + INSN(sw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ + /* fsd -> c.fsdsp/c.fsd */ \ + if (do_compress()) { \ + if (is_c_fldsdsp(Rs, offset)) { \ + c_fsdsp(Rd, offset); \ + return; \ + } else if (is_c_fldsd(Rs, Rd, offset)) { \ + c_fsd(Rd, Rs, offset); \ + return; \ + } \ + } \ + _fsd(Rd, Rs, offset); \ + } + + INSN(fsd); + +#undef INSN + +// -------------------------- +// Conditional branch instructions +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ + /* beq/bne -> c.beqz/c.bnez */ \ + if (do_compress() && \ + (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \ + is_imm_in_range(offset, 8, 1))) { \ + C_NAME(Rs1, offset); \ + return; \ + } \ + NORMAL_NAME(Rs1, Rs2, offset); \ + } + + INSN(beq, c_beqz, _beq); + INSN(bne, c_bnez, _bne); + +#undef INSN + +// -------------------------- +// Unconditional branch instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, const int32_t offset) { \ + /* jal -> c.j */ \ + if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) { \ + c_j(offset); \ + return; \ + } \ + _jal(Rd, offset); \ + } + + INSN(jal); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ + /* jalr -> c.jr/c.jalr */ \ + if (do_compress() && (offset == 0 && Rs != x0)) { \ + if (Rd == x1) { \ + c_jalr(Rs); \ + return; \ + } else if (Rd == x0) { \ + c_jr(Rs); \ + return; \ + } \ + } \ + _jalr(Rd, Rs, offset); \ + } + + INSN(jalr); + +#undef INSN + +// -------------------------- +// Miscellaneous Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME() { \ + /* ebreak -> c.ebreak */ \ + if (do_compress()) { \ + c_ebreak(); \ + return; \ + } \ + _ebreak(); \ + } + + INSN(ebreak); + +#undef INSN + +#define INSN(NAME) \ + void NAME() { \ + /* The illegal instruction in RVC is presented by a 16-bit 0. */ \ + if (do_compress()) { \ + emit_int16(0); \ + return; \ + } \ + _halt(); \ + } + + INSN(halt); + +#undef INSN + +// -------------------------- +// Immediate Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, int64_t imm) { \ + /* li -> c.li */ \ + if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) { \ + c_li(Rd, imm); \ + return; \ + } \ + _li(Rd, imm); \ + } + + INSN(li); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */ \ + if (do_compress()) { \ + if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) { \ + c_addi(Rd, imm); \ + return; \ + } else if (imm == 0 && Rd != x0 && Rs1 != x0) { \ + c_mv(Rd, Rs1); \ + return; \ + } else if (Rs1 == sp && imm != 0) { \ + if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) { \ + c_addi16sp(imm); \ + return; \ + } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \ + c_addi4spn(Rd, imm); \ + return; \ + } \ + } \ + } \ + _addi(Rd, Rs1, imm); \ + } + + INSN(addi); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* addiw -> c.addiw */ \ + if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) { \ + c_addiw(Rd, imm); \ + return; \ + } \ + _addiw(Rd, Rs1, imm); \ + } + + INSN(addiw); + +#undef INSN + +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, int32_t imm) { \ + /* and_imm12 -> c.andi */ \ + if (do_compress() && \ + (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) { \ + c_andi(Rd, imm); \ + return; \ + } \ + _and_imm12(Rd, Rs1, imm); \ + } + + INSN(and_imm12); + +#undef INSN + +// -------------------------- +// Shift Immediate Instructions +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + /* slli -> c.slli */ \ + if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) { \ + c_slli(Rd, shamt); \ + return; \ + } \ + _slli(Rd, Rs1, shamt); \ + } + + INSN(slli); + +#undef INSN + +// -------------------------- +#define INSN(NAME, C_NAME, NORMAL_NAME) \ + void NAME(Register Rd, Register Rs1, unsigned shamt) { \ + /* srai/srli -> c.srai/c.srli */ \ + if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) { \ + C_NAME(Rd, shamt); \ + return; \ + } \ + NORMAL_NAME(Rd, Rs1, shamt); \ + } + + INSN(srai, c_srai, _srai); + INSN(srli, c_srli, _srli); + +#undef INSN + +// -------------------------- +// Upper Immediate Instruction +// -------------------------- +#define INSN(NAME) \ + void NAME(Register Rd, int32_t imm) { \ + /* lui -> c.lui */ \ + if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \ + c_lui(Rd, imm); \ + return; \ + } \ + _lui(Rd, imm); \ + } + + INSN(lui); + +#undef INSN + +// --------------------------------------------------------------------------------------- + + void bgt(Register Rs, Register Rt, const address &dest); + void ble(Register Rs, Register Rt, const address &dest); + void bgtu(Register Rs, Register Rt, const address &dest); + void bleu(Register Rs, Register Rt, const address &dest); + void bgt(Register Rs, Register Rt, Label &l, bool is_far = false); + void ble(Register Rs, Register Rt, Label &l, bool is_far = false); + void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false); + void bleu(Register Rs, Register Rt, Label &l, bool is_far = false); + + typedef void (Assembler::* jal_jalr_insn)(Register Rt, address dest); + typedef void (Assembler::* load_insn_by_temp)(Register Rt, address dest, Register temp); + typedef void (Assembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest); + typedef void (Assembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far); + + void wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn, + compare_and_branch_label_insn neg_insn, bool is_far); + void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn); + void wrap_label(Register r, Label &L, jal_jalr_insn insn); + + // calculate pseudoinstruction + void add(Register Rd, Register Rn, int64_t increment, Register temp = t0); + void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0); + void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0); + void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0); + + // RVB pseudo instructions + // zero extend word + void zext_w(Register Rd, Register Rs); + + Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + + // Stack overflow checking + virtual void bang_stack_with_offset(int offset) { Unimplemented(); } + + static bool operand_valid_for_add_immediate(long imm) { + return is_imm_in_range(imm, 12, 0); + } + + // The maximum range of a branch is fixed for the RISCV architecture. + static const unsigned long branch_range = 1 * M; + + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + + virtual ~Assembler() {} +}; + +class BiasedLockingCounters; + +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP +#define CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +inline bool is_imm_in_range(long value, unsigned bits, unsigned align_bits) { + intx sign_bits = (value >> (bits + align_bits - 1)); + return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1)); +} + +inline bool is_unsigned_imm_in_range(intx value, unsigned bits, unsigned align_bits) { + return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0); +} + +inline bool is_offset_in_range(intx offset, unsigned bits) { + return is_imm_in_range(offset, bits, 0); +} + +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/bytes_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/bytes_riscv.hpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_BYTES_RISCV_HPP +#define CPU_RISCV_BYTES_RISCV_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // RISCV needs to check for alignment. + + // Forward declarations of the compiler-dependent implementation + static inline u2 swap_u2(u2 x); + static inline u4 swap_u4(u4 x); + static inline u8 swap_u8(u8 x); + + static inline u2 get_native_u2(address p) { + if ((intptr_t(p) & 1) == 0) { + return *(u2*)p; + } else { + return ((u2)(p[1]) << 8) | + ((u2)(p[0])); + } + } + + static inline u4 get_native_u4(address p) { + switch (intptr_t(p) & 3) { + case 0: + return *(u4*)p; + + case 2: + return ((u4)(((u2*)p)[1]) << 16) | + ((u4)(((u2*)p)[0])); + + default: + return ((u4)(p[3]) << 24) | + ((u4)(p[2]) << 16) | + ((u4)(p[1]) << 8) | + ((u4)(p[0])); + } + } + + static inline u8 get_native_u8(address p) { + switch (intptr_t(p) & 7) { + case 0: + return *(u8*)p; + + case 4: + return ((u8)(((u4*)p)[1]) << 32) | + ((u8)(((u4*)p)[0])); + + case 2: + return ((u8)(((u2*)p)[3]) << 48) | + ((u8)(((u2*)p)[2]) << 32) | + ((u8)(((u2*)p)[1]) << 16) | + ((u8)(((u2*)p)[0])); + + default: + return ((u8)(p[7]) << 56) | + ((u8)(p[6]) << 48) | + ((u8)(p[5]) << 40) | + ((u8)(p[4]) << 32) | + ((u8)(p[3]) << 24) | + ((u8)(p[2]) << 16) | + ((u8)(p[1]) << 8) | + ((u8)(p[0])); + } + } + + static inline void put_native_u2(address p, u2 x) { + if ((intptr_t(p) & 1) == 0) { + *(u2*)p = x; + } else { + p[1] = x >> 8; + p[0] = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + switch (intptr_t(p) & 3) { + case 0: + *(u4*)p = x; + break; + + case 2: + ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: + ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + switch (intptr_t(p) & 7) { + case 0: + *(u8*)p = x; + break; + + case 4: + ((u4*)p)[1] = x >> 32; + ((u4*)p)[0] = x; + break; + + case 2: + ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: + ((u1*)p)[7] = x >> 56; + ((u1*)p)[6] = x >> 48; + ((u1*)p)[5] = x >> 40; + ((u1*)p)[4] = x >> 32; + ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + break; + } + } + + // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering) + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } +}; + +#include OS_CPU_HEADER(bytes) + +#endif // CPU_RISCV_BYTES_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp @@ -0,0 +1,351 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/javaClasses.hpp" +#include "nativeInst_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" + + +#define __ ce->masm()-> + +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); + __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); + __ la(t0, safepoint_pc.target()); + __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); + + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + + __ far_jump(RuntimeAddress(stub)); +} + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(t0, m); + ce->store_parameter(t0, 1); + ce->store_parameter(_bci, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ j(_continuation); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) + : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) + : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_cpu_register()) { + __ mv(t0, _index->as_register()); + } else { + __ mv(t0, _index->as_jint()); + } + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + assert(_array != NULL, "sanity"); + __ mv(t1, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } + int32_t off = 0; + __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off); + __ jalr(ra, ra, off); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + +// Implementation of NewInstanceStub +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mv(x13, _klass_reg->as_register()); + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == x10, "result must in x10"); + __ j(_continuation); +} + +// Implementation of NewTypeArrayStub +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); + assert(_klass_reg->as_register() == x13, "klass_reg must in x13"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == x10, "result must in x10"); + __ j(_continuation); +} + +// Implementation of NewObjectArrayStub +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); + assert(_klass_reg->as_register() == x13, "klass_reg must in x13"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == x10, "result must in x10"); + __ j(_continuation); +} + +// Implementation of MonitorAccessStubs +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +: MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); + ce->store_parameter(_lock_reg->as_register(), 0); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ j(_continuation); +} + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it + ce->monitor_address(_monitor_ix, _lock_reg); + } + ce->store_parameter(_lock_reg->as_register(), 0); + // note: non-blocking leaf routine => no call info needed + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + __ la(ra, _continuation); + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) {} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "RISCV should not use C1 runtime patching"); +} + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_trap_request, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a = NULL; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); + // pass the object in a tmp register because all other registers + // must be preserved + if (_obj->is_cpu_register()) { + __ mv(t0, _obj->as_register()); + } + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + // ---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; + SharedRuntime::java_calling_convention(signature, args, args_num); + + // push parameters + Register r[args_num]; + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int j = 0; j < args_num; j++) { + VMReg r_1 = args[j].first(); + if (r_1->is_stack()) { + int st_off = r_1->reg2stack() * wordSize; + __ sd(r[j], Address(sp, st_off)); + } else { + assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg"); + } + } + + ce->align_call(lir_static_call); + + ce->emit_static_call_stub(); + if (ce->compilation()->bailed_out()) { + return; // CodeCache is full + } + Address resolve(SharedRuntime::get_resolve_static_call_stub(), + relocInfo::static_call_type); + address call = __ trampoline_call(resolve); + if (call == NULL) { + ce->bailout("trampoline stub overflow"); + return; + } + ce->add_call_info_here(info()); + +#ifndef PRODUCT + __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + __ add_memory_int32(Address(t1), 1); +#endif + + __ j(_continuation); +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_DEFS_RISCV_HPP +#define CPU_RISCV_C1_DEFS_RISCV_HPP + +// native word offsets from memory address (little endian) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// explicit rounding operations are required to implement the strictFP mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// registers +enum { + pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission + pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of float registers used during code emission + + // caller saved + pd_nof_caller_save_cpu_regs_frame_map = 13, // number of registers killed by calls + pd_nof_caller_save_fpu_regs_frame_map = 32, // number of float registers killed by calls + + pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, + pd_last_callee_saved_reg = 21, + + pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, + + pd_nof_cpu_regs_reg_alloc + = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator + pd_nof_fpu_regs_reg_alloc = 32, // number of float registers that are visible to register allocator + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan + pd_nof_xmm_regs_linearscan = 0, // don't have vector registers + + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, + pd_first_byte_reg = 0, + pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, + + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + 31, + + pd_first_callee_saved_fpu_reg_1 = 8 + pd_first_fpu_reg, + pd_last_callee_saved_fpu_reg_1 = 9 + pd_first_fpu_reg, + pd_first_callee_saved_fpu_reg_2 = 18 + pd_first_fpu_reg, + pd_last_callee_saved_fpu_reg_2 = 27 + pd_first_fpu_reg +}; + + +// Encoding of float value in debug info. This is true on x86 where +// floats are extended to doubles when stored in the stack, false for +// RISCV where floats and doubles are stored in their native form. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +//-------------------------------------------------------- +// FpuStackSim +//-------------------------------------------------------- + +// No FPU stack on RISCV Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP +#define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP + +// No FPU stack on RISCV +class FpuStackSim; + +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp @@ -0,0 +1,388 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value + // so we must add it in here. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg1 = r_1->as_Register(); + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { + Register reg2 = r_2->as_Register(); + assert(reg2 == reg1, "must be same register"); + opr = as_long_opr(reg1); + } else if (is_reference_type(type)) { + opr = as_oop_opr(reg1); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg1); + } else if (type == T_ADDRESS) { + opr = as_address_opr(reg1); + } else { + opr = as_opr(reg1); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + int num = r_1->as_FloatRegister()->encoding(); + if (type == T_FLOAT) { + opr = LIR_OprFact::single_fpu(num); + } else { + opr = LIR_OprFact::double_fpu(num); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +LIR_Opr FrameMap::zr_opr; +LIR_Opr FrameMap::r1_opr; +LIR_Opr FrameMap::r2_opr; +LIR_Opr FrameMap::r3_opr; +LIR_Opr FrameMap::r4_opr; +LIR_Opr FrameMap::r5_opr; +LIR_Opr FrameMap::r6_opr; +LIR_Opr FrameMap::r7_opr; +LIR_Opr FrameMap::r8_opr; +LIR_Opr FrameMap::r9_opr; +LIR_Opr FrameMap::r10_opr; +LIR_Opr FrameMap::r11_opr; +LIR_Opr FrameMap::r12_opr; +LIR_Opr FrameMap::r13_opr; +LIR_Opr FrameMap::r14_opr; +LIR_Opr FrameMap::r15_opr; +LIR_Opr FrameMap::r16_opr; +LIR_Opr FrameMap::r17_opr; +LIR_Opr FrameMap::r18_opr; +LIR_Opr FrameMap::r19_opr; +LIR_Opr FrameMap::r20_opr; +LIR_Opr FrameMap::r21_opr; +LIR_Opr FrameMap::r22_opr; +LIR_Opr FrameMap::r23_opr; +LIR_Opr FrameMap::r24_opr; +LIR_Opr FrameMap::r25_opr; +LIR_Opr FrameMap::r26_opr; +LIR_Opr FrameMap::r27_opr; +LIR_Opr FrameMap::r28_opr; +LIR_Opr FrameMap::r29_opr; +LIR_Opr FrameMap::r30_opr; +LIR_Opr FrameMap::r31_opr; + +LIR_Opr FrameMap::fp_opr; +LIR_Opr FrameMap::sp_opr; + +LIR_Opr FrameMap::receiver_opr; + +LIR_Opr FrameMap::zr_oop_opr; +LIR_Opr FrameMap::r1_oop_opr; +LIR_Opr FrameMap::r2_oop_opr; +LIR_Opr FrameMap::r3_oop_opr; +LIR_Opr FrameMap::r4_oop_opr; +LIR_Opr FrameMap::r5_oop_opr; +LIR_Opr FrameMap::r6_oop_opr; +LIR_Opr FrameMap::r7_oop_opr; +LIR_Opr FrameMap::r8_oop_opr; +LIR_Opr FrameMap::r9_oop_opr; +LIR_Opr FrameMap::r10_oop_opr; +LIR_Opr FrameMap::r11_oop_opr; +LIR_Opr FrameMap::r12_oop_opr; +LIR_Opr FrameMap::r13_oop_opr; +LIR_Opr FrameMap::r14_oop_opr; +LIR_Opr FrameMap::r15_oop_opr; +LIR_Opr FrameMap::r16_oop_opr; +LIR_Opr FrameMap::r17_oop_opr; +LIR_Opr FrameMap::r18_oop_opr; +LIR_Opr FrameMap::r19_oop_opr; +LIR_Opr FrameMap::r20_oop_opr; +LIR_Opr FrameMap::r21_oop_opr; +LIR_Opr FrameMap::r22_oop_opr; +LIR_Opr FrameMap::r23_oop_opr; +LIR_Opr FrameMap::r24_oop_opr; +LIR_Opr FrameMap::r25_oop_opr; +LIR_Opr FrameMap::r26_oop_opr; +LIR_Opr FrameMap::r27_oop_opr; +LIR_Opr FrameMap::r28_oop_opr; +LIR_Opr FrameMap::r29_oop_opr; +LIR_Opr FrameMap::r30_oop_opr; +LIR_Opr FrameMap::r31_oop_opr; + +LIR_Opr FrameMap::t0_opr; +LIR_Opr FrameMap::t1_opr; +LIR_Opr FrameMap::t0_long_opr; +LIR_Opr FrameMap::t1_long_opr; + +LIR_Opr FrameMap::r10_metadata_opr; +LIR_Opr FrameMap::r11_metadata_opr; +LIR_Opr FrameMap::r12_metadata_opr; +LIR_Opr FrameMap::r13_metadata_opr; +LIR_Opr FrameMap::r14_metadata_opr; +LIR_Opr FrameMap::r15_metadata_opr; + +LIR_Opr FrameMap::long10_opr; +LIR_Opr FrameMap::long11_opr; +LIR_Opr FrameMap::fpu10_float_opr; +LIR_Opr FrameMap::fpu10_double_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + +//-------------------------------------------------------- +// FrameMap +//-------------------------------------------------------- +// |---f31--| +// |---..---| +// |---f28--| +// |---f27--|<---pd_last_callee_saved_fpu_reg_2 +// |---..---| +// |---f18--|<---pd_first_callee_saved_fpu_reg_2 +// |---f17--| +// |---..---| +// |---f10--| +// |---f9---|<---pd_last_callee_saved_fpu_reg_1 +// |---f8---|<---pd_first_callee_saved_fpu_reg_1 +// |---f7---| +// |---..---| +// |---f0---| +// |---x27--| +// |---x23--| +// |---x8---| +// |---x4---| +// |---x3---| +// |---x2---| +// |---x1---| +// |---x0---| +// |---x26--|<---pd_last_callee_saved_reg +// |---..---| +// |---x18--| +// |---x9---|<---pd_first_callee_saved_reg +// |---x31--| +// |---..---| +// |---x28--| +// |---x17--| +// |---..---| +// |---x10--| +// |---x7---| + +void FrameMap::initialize() { + assert(!_init_done, "once"); + + int i = 0; + + // caller save register + map_register(i, x7); r7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x10); r10_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x11); r11_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x12); r12_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x13); r13_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x14); r14_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x15); r15_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x16); r16_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x17); r17_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x28); r28_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x29); r29_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x30); r30_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x31); r31_opr = LIR_OprFact::single_cpu(i); i++; + + // callee save register + map_register(i, x9); r9_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x18); r18_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x19); r19_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x20); r20_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x21); r21_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x22); r22_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x24); r24_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x25); r25_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, x26); r26_opr = LIR_OprFact::single_cpu(i); i++; + + // special register + map_register(i, x0); zr_opr = LIR_OprFact::single_cpu(i); i++; // zr + map_register(i, x1); r1_opr = LIR_OprFact::single_cpu(i); i++; // ra + map_register(i, x2); r2_opr = LIR_OprFact::single_cpu(i); i++; // sp + map_register(i, x3); r3_opr = LIR_OprFact::single_cpu(i); i++; // gp + map_register(i, x4); r4_opr = LIR_OprFact::single_cpu(i); i++; // thread + map_register(i, x8); r8_opr = LIR_OprFact::single_cpu(i); i++; // fp + map_register(i, x23); r23_opr = LIR_OprFact::single_cpu(i); i++; // java thread + map_register(i, x27); r27_opr = LIR_OprFact::single_cpu(i); i++; // heapbase + + // tmp register + map_register(i, x5); r5_opr = LIR_OprFact::single_cpu(i); i++; // t0 + map_register(i, x6); r6_opr = LIR_OprFact::single_cpu(i); i++; // t1 + + t0_opr = r5_opr; + t1_opr = r6_opr; + t0_long_opr = LIR_OprFact::double_cpu(r5_opr->cpu_regnr(), r5_opr->cpu_regnr()); + t1_long_opr = LIR_OprFact::double_cpu(r6_opr->cpu_regnr(), r6_opr->cpu_regnr()); + + long10_opr = LIR_OprFact::double_cpu(r10_opr->cpu_regnr(), r10_opr->cpu_regnr()); + long11_opr = LIR_OprFact::double_cpu(r11_opr->cpu_regnr(), r11_opr->cpu_regnr()); + + fpu10_float_opr = LIR_OprFact::single_fpu(10); + fpu10_double_opr = LIR_OprFact::double_fpu(10); + + i = 0; + _caller_save_cpu_regs[i++] = r7_opr; + _caller_save_cpu_regs[i++] = r10_opr; + _caller_save_cpu_regs[i++] = r11_opr; + _caller_save_cpu_regs[i++] = r12_opr; + _caller_save_cpu_regs[i++] = r13_opr; + _caller_save_cpu_regs[i++] = r14_opr; + _caller_save_cpu_regs[i++] = r15_opr; + _caller_save_cpu_regs[i++] = r16_opr; + _caller_save_cpu_regs[i++] = r17_opr; + _caller_save_cpu_regs[i++] = r28_opr; + _caller_save_cpu_regs[i++] = r29_opr; + _caller_save_cpu_regs[i++] = r30_opr; + _caller_save_cpu_regs[i++] = r31_opr; + + _init_done = true; + + zr_oop_opr = as_oop_opr(x0); + r1_oop_opr = as_oop_opr(x1); + r2_oop_opr = as_oop_opr(x2); + r3_oop_opr = as_oop_opr(x3); + r4_oop_opr = as_oop_opr(x4); + r5_oop_opr = as_oop_opr(x5); + r6_oop_opr = as_oop_opr(x6); + r7_oop_opr = as_oop_opr(x7); + r8_oop_opr = as_oop_opr(x8); + r9_oop_opr = as_oop_opr(x9); + r10_oop_opr = as_oop_opr(x10); + r11_oop_opr = as_oop_opr(x11); + r12_oop_opr = as_oop_opr(x12); + r13_oop_opr = as_oop_opr(x13); + r14_oop_opr = as_oop_opr(x14); + r15_oop_opr = as_oop_opr(x15); + r16_oop_opr = as_oop_opr(x16); + r17_oop_opr = as_oop_opr(x17); + r18_oop_opr = as_oop_opr(x18); + r19_oop_opr = as_oop_opr(x19); + r20_oop_opr = as_oop_opr(x20); + r21_oop_opr = as_oop_opr(x21); + r22_oop_opr = as_oop_opr(x22); + r23_oop_opr = as_oop_opr(x23); + r24_oop_opr = as_oop_opr(x24); + r25_oop_opr = as_oop_opr(x25); + r26_oop_opr = as_oop_opr(x26); + r27_oop_opr = as_oop_opr(x27); + r28_oop_opr = as_oop_opr(x28); + r29_oop_opr = as_oop_opr(x29); + r30_oop_opr = as_oop_opr(x30); + r31_oop_opr = as_oop_opr(x31); + + r10_metadata_opr = as_metadata_opr(x10); + r11_metadata_opr = as_metadata_opr(x11); + r12_metadata_opr = as_metadata_opr(x12); + r13_metadata_opr = as_metadata_opr(x13); + r14_metadata_opr = as_metadata_opr(x14); + r15_metadata_opr = as_metadata_opr(x15); + + sp_opr = as_pointer_opr(sp); + fp_opr = as_pointer_opr(fp); + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; + SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + return Address(sp, in_bytes(sp_offset)); +} + + +// ----------------mapping----------------------- +// all mapping is based on fp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + + +// Frame for simple leaf methods (quick entries) +// +// +----------+ +// | ret addr | <- TOS +// +----------+ +// | args | +// | ...... | + +// Frame for standard methods +// +// | .........| <- TOS +// | locals | +// +----------+ +// | old fp, | +// +----------+ +// | ret addr | +// +----------+ +// | args | <- FP +// | .........| + + +// For OopMaps, map a local variable or spill index to an VMRegImpl name. +// This is the offset from sp() in the frame of the slot for the index, +// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) +// +// framesize + +// stack0 stack0 0 <- VMReg +// | | | +// ...........|..............|.............| +// 0 1 2 3 x x 4 5 6 ... | <- local indices +// ^ ^ sp() ( x x indicate link +// | | and return addr) +// arguments non-argument locals + + +VMReg FrameMap::fpu_regname (int n) { + // Return the OptoReg name for the fpu stack slot "n" + // A spilled fpu stack slot comprises to two single-word OptoReg's. + return as_FloatRegister(n)->as_VMReg(); +} + +LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + +// JSR 292 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + return LIR_OprFact::illegalOpr; // Not needed on riscv +} + +bool FrameMap::validate_frame() { + return true; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_FRAMEMAP_RISCV_HPP +#define CPU_RISCV_C1_FRAMEMAP_RISCV_HPP + +// On RISCV the frame looks as follows: +// +// +-----------------------------+---------+----------------------------------------+----------------+----------- +// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . +// +-----------------------------+---------+----------------------------------------+----------------+----------- + + public: + static const int pd_c_runtime_reserved_arg_size; + + enum { + first_available_sp_in_frame = 0, + frame_pad_in_bytes = 16, + nof_reg_args = 8 + }; + + public: + static LIR_Opr receiver_opr; + + static LIR_Opr zr_opr; + static LIR_Opr r1_opr; + static LIR_Opr r2_opr; + static LIR_Opr r3_opr; + static LIR_Opr r4_opr; + static LIR_Opr r5_opr; + static LIR_Opr r6_opr; + static LIR_Opr r7_opr; + static LIR_Opr r8_opr; + static LIR_Opr r9_opr; + static LIR_Opr r10_opr; + static LIR_Opr r11_opr; + static LIR_Opr r12_opr; + static LIR_Opr r13_opr; + static LIR_Opr r14_opr; + static LIR_Opr r15_opr; + static LIR_Opr r16_opr; + static LIR_Opr r17_opr; + static LIR_Opr r18_opr; + static LIR_Opr r19_opr; + static LIR_Opr r20_opr; + static LIR_Opr r21_opr; + static LIR_Opr r22_opr; + static LIR_Opr r23_opr; + static LIR_Opr r24_opr; + static LIR_Opr r25_opr; + static LIR_Opr r26_opr; + static LIR_Opr r27_opr; + static LIR_Opr r28_opr; + static LIR_Opr r29_opr; + static LIR_Opr r30_opr; + static LIR_Opr r31_opr; + static LIR_Opr fp_opr; + static LIR_Opr sp_opr; + + static LIR_Opr zr_oop_opr; + static LIR_Opr r1_oop_opr; + static LIR_Opr r2_oop_opr; + static LIR_Opr r3_oop_opr; + static LIR_Opr r4_oop_opr; + static LIR_Opr r5_oop_opr; + static LIR_Opr r6_oop_opr; + static LIR_Opr r7_oop_opr; + static LIR_Opr r8_oop_opr; + static LIR_Opr r9_oop_opr; + static LIR_Opr r10_oop_opr; + static LIR_Opr r11_oop_opr; + static LIR_Opr r12_oop_opr; + static LIR_Opr r13_oop_opr; + static LIR_Opr r14_oop_opr; + static LIR_Opr r15_oop_opr; + static LIR_Opr r16_oop_opr; + static LIR_Opr r17_oop_opr; + static LIR_Opr r18_oop_opr; + static LIR_Opr r19_oop_opr; + static LIR_Opr r20_oop_opr; + static LIR_Opr r21_oop_opr; + static LIR_Opr r22_oop_opr; + static LIR_Opr r23_oop_opr; + static LIR_Opr r24_oop_opr; + static LIR_Opr r25_oop_opr; + static LIR_Opr r26_oop_opr; + static LIR_Opr r27_oop_opr; + static LIR_Opr r28_oop_opr; + static LIR_Opr r29_oop_opr; + static LIR_Opr r30_oop_opr; + static LIR_Opr r31_oop_opr; + + static LIR_Opr t0_opr; + static LIR_Opr t1_opr; + static LIR_Opr t0_long_opr; + static LIR_Opr t1_long_opr; + + static LIR_Opr r10_metadata_opr; + static LIR_Opr r11_metadata_opr; + static LIR_Opr r12_metadata_opr; + static LIR_Opr r13_metadata_opr; + static LIR_Opr r14_metadata_opr; + static LIR_Opr r15_metadata_opr; + + static LIR_Opr long10_opr; + static LIR_Opr long11_opr; + static LIR_Opr fpu10_float_opr; + static LIR_Opr fpu10_double_opr; + + static LIR_Opr as_long_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + // VMReg name for spilled physical FPU stack slot n + static VMReg fpu_regname(int n); + + static bool is_caller_save_register(LIR_Opr opr) { return true; } + static bool is_caller_save_register(Register r) { return true; } + + static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } + static int last_cpu_reg() { return pd_last_cpu_reg; } + +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +#define __ _masm-> + +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, + LIR_Opr result, CodeEmitInfo* info) { + // opcode check + assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); + bool is_irem = (code == lir_irem); + // opreand check + assert(left->is_single_cpu(), "left must be a register"); + assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant"); + assert(result->is_single_cpu(), "result must be a register"); + Register lreg = left->as_register(); + Register dreg = result->as_register(); + + // power-of-2 constant check and codegen + if (right->is_constant()) { + int c = right->as_constant_ptr()->as_jint(); + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (is_irem) { + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); + } else { + unsigned int shift = exact_log2(c); + __ sraiw(t0, lreg, 0x1f); + __ srliw(t0, t0, BitsPerInt - shift); + __ addw(t1, lreg, t0); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t1, t1, c - 1); + } else { + __ zero_extend(t1, t1, shift); + } + __ subw(dreg, t1, t0); + } + } else { + if (c == 1) { + // move lreg to dreg if divisor is 1 + __ mv(dreg, lreg); + } else { + unsigned int shift = exact_log2(c); + __ sraiw(t0, lreg, 0x1f); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t0, t0, c - 1); + } else { + __ zero_extend(t0, t0, shift); + } + __ addw(dreg, t0, lreg); + __ sraiw(dreg, dreg, shift); + } + } + } else { + Register rreg = right->as_register(); + __ corrected_idivl(dreg, lreg, rreg, is_irem); + } +} + +void LIR_Assembler::arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, + Register lreg, Register dreg) { + // cpu register - constant + jlong c; + + switch (right->type()) { + case T_LONG: + c = right->as_constant_ptr()->as_jlong(); break; + case T_INT: // fall through + case T_ADDRESS: + c = right->as_constant_ptr()->as_jint(); break; + default: + ShouldNotReachHere(); + c = 0; // unreachable + } + + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg) { + COMMENT("effective nop elided"); + return; + } + switch (left->type()) { + case T_INT: + switch (code) { + case lir_add: __ addw(dreg, lreg, c); break; + case lir_sub: __ subw(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + break; + case T_OBJECT: // fall through + case T_ADDRESS: + switch (code) { + case lir_add: __ add(dreg, lreg, c); break; + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + Register lreg = left->as_register(); + Register dreg = as_reg(dest); + + if (right->is_single_cpu()) { + // cpu register - cpu register + assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ addw(dest->as_register(), lreg, rreg); break; + case lir_sub: __ subw(dest->as_register(), lreg, rreg); break; + case lir_mul: __ mulw(dest->as_register(), lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_double_cpu()) { + Register rreg = right->as_register_lo(); + // sigle_cpu + double_cpu; can happen with obj_long + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + switch (code) { + case lir_add: __ add(dreg, lreg, rreg); break; + case lir_sub: __ sub(dreg, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_constant()) { + arith_op_single_cpu_right_constant(code, left, right, lreg, dreg); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + Register lreg_lo = left->as_register_lo(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + switch (code) { + case lir_add: __ add(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_sub: __ sub(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_mul: __ mul(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false); break; + case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true); break; + default: + ShouldNotReachHere(); + } + } else if (right->is_constant()) { + jlong c = right->as_constant_ptr()->as_jlong(); + Register dreg = as_reg(dest); + switch (code) { + case lir_add: // fall through + case lir_sub: + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); + return; + } + code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); + break; + case lir_div: + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); + } else { + unsigned int shift = exact_log2_long(c); + // use t0 as intermediate result register + __ srai(t0, lreg_lo, 0x3f); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t0, t0, c - 1); + } else { + __ zero_extend(t0, t0, shift); + } + __ add(dreg, t0, lreg_lo); + __ srai(dreg, dreg, shift); + } + break; + case lir_rem: + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); + } else { + unsigned int shift = exact_log2_long(c); + __ srai(t0, lreg_lo, 0x3f); + __ srli(t0, t0, BitsPerLong - shift); + __ add(t1, lreg_lo, t0); + if (is_imm_in_range(c - 1, 12, 0)) { + __ andi(t1, t1, c - 1); + } else { + __ zero_extend(t1, t1, shift); + } + __ sub(dreg, t1, t0); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { + if (right->is_double_fpu()) { + // fpu register - fpu register + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + arith_op_single_cpu(code, left, right, dest); + } else if (left->is_double_cpu()) { + arith_op_double_cpu(code, left, right, dest); + } else if (left->is_single_fpu()) { + arith_op_single_fpu(code, left, right, dest); + } else if (left->is_double_fpu()) { + arith_op_double_fpu(code, left, right, dest); + } else { + ShouldNotReachHere(); + } +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP + + // arith_op sub functions + void arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg); + void arithmetic_idiv(LIR_Op3* op, bool is_irem); + +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "ci/ciArrayKlass.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + + +void LIR_Assembler::generic_arraycopy(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, CodeStub *stub) { + assert(src == x11 && src_pos == x12, "mismatch in calling convention"); + // Save the arguments in case the generic arraycopy fails and we + // have to fall back to the JNI stub + arraycopy_store_args(src, src_pos, length, dst, dst_pos); + + address copyfunc_addr = StubRoutines::generic_arraycopy(); + assert(copyfunc_addr != NULL, "generic arraycopy stub required"); + + // The arguments are in java calling convention so we shift them + // to C convention + assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4); + __ mv(c_rarg0, j_rarg0); + assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4); + __ mv(c_rarg1, j_rarg1); + assert_different_registers(c_rarg2, j_rarg3, j_rarg4); + __ mv(c_rarg2, j_rarg2); + assert_different_registers(c_rarg3, j_rarg4); + __ mv(c_rarg3, j_rarg3); + __ mv(c_rarg4, j_rarg4); +#ifndef PRODUCT + if (PrintC1Statistics) { + __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); + __ beqz(x10, *stub->continuation()); + // Reload values from the stack so they are where the stub + // expects them. + arraycopy_load_args(src, src_pos, length, dst, dst_pos); + + // x10 is -1^K where K == partial copied count + __ xori(t0, x10, -1); + // adjust length down and src/end pos up by partial copied count + __ subw(length, length, t0); + __ addw(src_pos, src_pos, t0); + __ addw(dst_pos, dst_pos, t0); + __ j(*stub->entry()); + + __ bind(*stub->continuation()); +} + +void LIR_Assembler::arraycopy_simple_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, int flags) { + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ beqz(src, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ beqz(dst, *stub->entry(), /* is_far */ true); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + } + + // check if negative + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ bltz(src_pos, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ bltz(dst_pos, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ bltz(length, *stub->entry(), /* is_far */ true); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ addw(tmp, src_pos, length); + __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); + __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ addw(tmp, dst_pos, length); + __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); + __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true); + } +} + +void LIR_Assembler::arraycopy_checkcast(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, + address copyfunc_addr, int flags) { + // src is not a sub class of dst so we have to do a + // per-element check. + int mask = LIR_OpArrayCopy::src_objarray | LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + Address klass_lh_addr(tmp, lh_offset); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ lw(t0, klass_lh_addr); + __ mvw(t1, objArray_lh); + __ bne(t0, t1, *stub->entry(), /* is_far */ true); + } + + // Spill because stubs can use any register they like and it's + // easier to restore just those that we care about. + arraycopy_store_args(src, src_pos, length, dst, dst_pos); + arraycopy_checkcast_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); + __ far_call(RuntimeAddress(copyfunc_addr)); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ bnez(x10, failed); + __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1); + __ bind(failed); + } +#endif + + __ beqz(x10, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0); + + // Restore previously spilled arguments + arraycopy_load_args(src, src_pos, length, dst, dst_pos); + + // return value is -1^K where K is partial copied count + __ xori(t0, x10, -1); + // adjust length down and src/end pos up by partial copied count + __ subw(length, length, t0); + __ addw(src_pos, src_pos, t0); + __ addw(dst_pos, dst_pos, t0); +} + +void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, int flags) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + if (UseCompressedClassPointers) { + __ lwu(tmp, Address(src, oopDesc::klass_offset_in_bytes())); + __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(tmp, Address(src, oopDesc::klass_offset_in_bytes())); + __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } + __ bne(tmp, t0, *stub->entry(), /* is_far */ true); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + +#define PUSH(r1, r2) \ + __ addi(sp, sp, -2 * wordSize); \ + __ sd(r1, Address(sp, 1 * wordSize)); \ + __ sd(r2, Address(sp, 0)); + +#define POP(r1, r2) \ + __ ld(r1, Address(sp, 1 * wordSize)); \ + __ ld(r2, Address(sp, 0)); \ + __ addi(sp, sp, 2 * wordSize); + + PUSH(src, dst); + __ load_klass(src, src); + __ load_klass(dst, dst); + __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); + + PUSH(src, dst); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + POP(src, dst); + __ bnez(dst, cont); + + __ bind(slow); + POP(src, dst); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + arraycopy_checkcast(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, copyfunc_addr, flags); + } + + __ j(*stub->entry()); + __ bind(cont); + POP(src, dst); + } +} + +void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) { + assert(default_type != NULL, "NULL default_type!"); + BasicType basic_type = default_type->element_type()->basic_type(); + + if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + __ mov_metadata(tmp, default_type->constant_encoding()); + if (UseCompressedClassPointers) { + __ encode_klass_not_null(tmp); + } + + if (basic_type != T_OBJECT) { + if (UseCompressedClassPointers) { + __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } + __ bne(tmp, t0, halt); + if (UseCompressedClassPointers) { + __ lwu(t0, Address(src, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(t0, Address(src, oopDesc::klass_offset_in_bytes())); + } + __ beq(tmp, t0, known_ok); + } else { + if (UseCompressedClassPointers) { + __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); + } + __ beq(tmp, t0, known_ok); + __ beq(src, dst, known_ok); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +} + +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + ciArrayKlass *default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (is_reference_type(basic_type)) { basic_type = T_OBJECT; } + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL) { + generic_arraycopy(src, src_pos, length, dst, dst_pos, stub); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), + "must be true at this point"); + + arraycopy_simple_check(src, src_pos, length, dst, dst_pos, tmp, stub, flags); + + if (flags & LIR_OpArrayCopy::type_check) { + arraycopy_type_check(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, flags); + } + +#ifdef ASSERT + arraycopy_assert(src, dst, tmp, default_type, flags); +#endif + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1); + } +#endif + arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name = NULL; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb != NULL) { + __ far_call(RuntimeAddress(entry)); + } else { + const int args_num = 3; + __ call_VM_leaf(entry, args_num); + } + + __ bind(*stub->continuation()); +} + + +void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type) { + int scale = array_element_size(basic_type); + __ shadd(c_rarg0, src_pos, src, t0, scale); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, dst_pos, length); + __ shadd(c_rarg1, dst_pos, dst, t0, scale); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, length); + __ mv(c_rarg2, length); + assert_different_registers(c_rarg2, dst); +} + +void LIR_Assembler::arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type) { + arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); + __ load_klass(c_rarg4, dst); + __ ld(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset())); + __ lwu(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); +} + +void LIR_Assembler::arraycopy_store_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos) { + __ sd(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset + __ sd(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset + __ sd(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset + __ sd(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset + __ sd(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset +} + +void LIR_Assembler::arraycopy_load_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos) { + __ ld(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset + __ ld(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset + __ ld(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset + __ ld(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset + __ ld(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP + + // arraycopy sub functions + void generic_arraycopy(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, CodeStub *stub); + void arraycopy_simple_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, int flags); + void arraycopy_checkcast(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, + address copyfunc_addr, int flags); + void arraycopy_type_check(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, Register tmp, + CodeStub *stub, BasicType basic_type, int flags); + void arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags); + void arraycopy_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type); + void arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, BasicType basic_type); + void arraycopy_store_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos); + void arraycopy_load_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos); + +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -0,0 +1,2264 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "code/compiledIC.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +NEEDS_CLEANUP // remove this definitions ? +const Register IC_Klass = t1; // where the IC klass is cached +const Register SYNC_header = x10; // synchronization header +const Register SHIFT_count = x10; // where count for shift operations must be + +#define __ _masm-> + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp2 = extra; + } + assert_different_registers(preserve, tmp1, tmp2); +} + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2, + Register &tmp3) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp2 = extra; + } else if (tmp3 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp3 = extra; + } + assert_different_registers(preserve, tmp1, tmp2, tmp3); +} + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + +void LIR_Assembler::clinit_barrier(ciMethod* method) { + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + assert(!method->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + + __ mov_metadata(t1, method->holder()->constant_encoding()); + __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(L_skip_barrier); +} + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } + +static jlong as_long(LIR_Opr data) { + jlong result; + switch (data->type()) { + case T_INT: + result = (data->as_jint()); + break; + case T_LONG: + result = (data->as_jlong()); + break; + default: + ShouldNotReachHere(); + result = 0; // unreachable + } + return result; +} + +Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + __ movptr(tmp, addr->disp()); + return Address(tmp, 0); + } + + Register base = addr->base()->as_pointer_register(); + LIR_Opr index_opr = addr->index(); + + if (index_opr->is_illegal()) { + return Address(base, addr->disp()); + } + + int scale = addr->scale(); + if (index_opr->is_cpu_register()) { + Register index; + if (index_opr->is_single_cpu()) { + index = index_opr->as_register(); + } else { + index = index_opr->as_register_lo(); + } + if (scale != 0) { + __ shadd(tmp, index, base, tmp, scale); + } else { + __ add(tmp, base, index); + } + return Address(tmp, addr->disp()); + } else if (index_opr->is_constant()) { + intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp(); + return Address(base, addr_offset); + } + + Unimplemented(); + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + return as_Address(addr, t0); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + return as_Address(addr); +} + +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is +// not encodable as a base + (immediate) offset, generate an explicit address +// calculation to hold the address in t0. +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { + precond(size == 4 || size == 8); + Address addr = frame_map()->address_for_slot(index, adjust); + precond(addr.getMode() == Address::base_plus_offset); + precond(addr.base() == sp); + precond(addr.offset() > 0); + uint mask = size - 1; + assert((addr.offset() & mask) == 0, "scaled offsets only"); + + return addr; +} + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + guarantee(osr_entry != NULL, "NULL osr_entry!"); + ValueStack* entry_state = osr_entry->state(); + int number_of_locks = entry_state->locks_size(); + + // we jump here if osr happens with the interpreter + // state set up to continue at the beginning of the + // loop that triggered osr - in particular, we have + // the following registers setup: + // + // x12: osr buffer + // + + //build frame + ciMethod* m = compilation()->method(); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[0..number_of_locks] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // x12: pointer to osr buffer + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + { + assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ld(t0, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); + __ bnez(t0, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif // ASSERT + __ ld(x9, Address(OSR_buf, slot_offset + 0)); + __ sd(x9, frame_map()->address_for_monitor_lock(i)); + __ ld(x9, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); + __ sd(x9, frame_map()->address_for_monitor_object(i)); + } + } +} + +// inline cache check; done before the frame is built. +int LIR_Assembler::check_icache() { + Register receiver = FrameMap::receiver_opr->as_register(); + Register ic_klass = IC_Klass; + int start_offset = __ offset(); + Label dont; + __ inline_cache_check(receiver, ic_klass, dont); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { + // force alignment after the cache check. + __ align(CodeEntryAlignment); + } + + __ bind(dont); + return start_offset; +} + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ mv(reg, zr); + } else { + __ movoop(reg, o, /* immediate */ true); + } +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + deoptimize_trap(info); +} + +// This specifies the rsp decrement needed to build the frame +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + + return in_bytes(frame_map()->framesize_in_bytes()); +} + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci ==> add a nop + // (was bug 5/14/1999 -gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(exception_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // the exception oop and pc are in x10, and x13 + // no other registers need to be preserved, so invalidate them + __ invalidate_registers(false, true, true, false, true, true); + + // check that there is really an exception + __ verify_not_null_oop(x10); + + // search an exception handler (x10: exception oop, x13: throwing pc) + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); + __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif // PRODUCT + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(x10); + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mv(x9, x10); // Perserve the exception + } + + // Preform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r10_opr); + stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); + __ unlock_object(x15, x14, x10, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ mv(c_rarg0, xthread); + __ mov_metadata(c_rarg1, method()->constant_encoding()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), c_rarg0, c_rarg1); + } + + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ mv(x10, x9); // Restore the exception + } + + // remove the activation and dispatch to the unwind handler + __ block_comment("remove_frame and dispatch to the unwind handler"); + __ remove_frame(initial_frame_size_in_bytes()); + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruciton is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bck => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(deopt_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ auipc(ra, 0); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + + // Pop the stack before the safepoint code + __ remove_frame(initial_frame_size_in_bytes()); + + if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + code_stub->set_safepoint_offset(__ offset()); + __ relocate(relocInfo::poll_return_type); + __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); + __ ret(); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + guarantee(info != NULL, "Shouldn't be NULL"); + __ get_polling_page(t0, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map + __ read_polling_page(t0, 0, relocInfo::poll_type); + return __ offset(); +} + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + __ mv(to_reg, from_reg); +} + +void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + address const_addr = NULL; + + switch (c->type()) { + case T_INT: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mvw(dest->as_register(), c->as_jint()); + break; + + case T_ADDRESS: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mv(dest->as_register(), c->as_jint()); + break; + + case T_LONG: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mv(dest->as_register_lo(), (intptr_t)c->as_jlong()); + break; + + case T_OBJECT: + case T_ARRAY: + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), dest->as_register()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + + case T_METADATA: + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } + break; + + case T_FLOAT: + const_addr = float_constant(c->as_jfloat()); + assert(const_addr != NULL, "must create float constant in the constant table"); + __ flw(dest->as_float_reg(), InternalAddress(const_addr)); + break; + + case T_DOUBLE: + const_addr = double_constant(c->as_jdouble()); + assert(const_addr != NULL, "must create double constant in the constant table"); + __ fld(dest->as_double_reg(), InternalAddress(const_addr)); + break; + + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_stack(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_OBJECT: + if (c->as_jobject() == NULL) { + __ sd(zr, frame_map()->address_for_slot(dest->single_stack_ix())); + } else { + const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::t1_opr, dest, c->type(), false); + } + break; + case T_ADDRESS: // fall through + const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::t1_opr, dest, c->type(), false); + case T_INT: // fall through + case T_FLOAT: + if (c->as_jint_bits() == 0) { + __ sw(zr, frame_map()->address_for_slot(dest->single_stack_ix())); + } else { + __ mvw(t1, c->as_jint_bits()); + __ sw(t1, frame_map()->address_for_slot(dest->single_stack_ix())); + } + break; + case T_LONG: // fall through + case T_DOUBLE: + if (c->as_jlong_bits() == 0) { + __ sd(zr, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + } else { + __ mv(t1, (intptr_t)c->as_jlong_bits()); + __ sd(t1, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + } + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_address(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* to_addr = dest->as_address_ptr(); + void (Assembler::* insn)(Register Rt, const Address &adr, Register temp); + switch (type) { + case T_ADDRESS: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sd; break; + case T_LONG: + assert(c->as_jlong() == 0, "should be"); + insn = &Assembler::sd; break; + case T_DOUBLE: + assert(c->as_jdouble() == 0.0, "should be"); + insn = &Assembler::sd; break; + case T_INT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sw; break; + case T_FLOAT: + assert(c->as_jfloat() == 0.0f, "should be"); + insn = &Assembler::sw; break; + case T_OBJECT: // fall through + case T_ARRAY: + assert(c->as_jobject() == 0, "should be"); + if (UseCompressedOops && !wide) { + insn = &Assembler::sw; + } else { + insn = &Assembler::sd; + } + break; + case T_CHAR: // fall through + case T_SHORT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sh; + break; + case T_BOOLEAN: // fall through + case T_BYTE: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::sb; break; + default: + ShouldNotReachHere(); + insn = &Assembler::sd; // unreachable + } + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + (_masm->*insn)(zr, as_Address(to_addr), t0); +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + // move between cpu-registers + if (dest->is_single_cpu()) { + if (src->type() == T_LONG) { + // Can do LONG -> OBJECT + move_regs(src->as_register_lo(), dest->as_register()); + return; + } + assert(src->is_single_cpu(), "must match"); + if (src->type() == T_OBJECT) { + __ verify_oop(src->as_register()); + } + move_regs(src->as_register(), dest->as_register()); + } else if (dest->is_double_cpu()) { + if (is_reference_type(src->type())) { + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; + } + assert(src->is_double_cpu(), "must match"); + Register f_lo = src->as_register_lo(); + Register f_hi = src->as_register_hi(); + Register t_lo = dest->as_register_lo(); + Register t_hi = dest->as_register_hi(); + assert(f_hi == f_lo, "must be same"); + assert(t_hi == t_lo, "must be same"); + move_regs(f_lo, t_lo); + } else if (dest->is_single_fpu()) { + assert(src->is_single_fpu(), "expect single fpu"); + __ fmv_s(dest->as_float_reg(), src->as_float_reg()); + } else if (dest->is_double_fpu()) { + assert(src->is_double_fpu(), "expect double fpu"); + __ fmv_d(dest->as_double_reg(), src->as_double_reg()); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + precond(src->is_register() && dest->is_stack()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_stack(), "should not call otherwise"); + if (src->is_single_cpu()) { + int index = dest->single_stack_ix(); + if (is_reference_type(type)) { + __ sd(src->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(src->as_register()); + } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { + __ sd(src->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ sw(src->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (src->is_double_cpu()) { + int index = dest->double_stack_ix(); + Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ sd(src->as_register_lo(), dest_addr_LO); + } else if (src->is_single_fpu()) { + int index = dest->single_stack_ix(); + __ fsw(src->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (src->is_double_fpu()) { + int index = dest->double_stack_ix(); + __ fsd(src->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (is_reference_type(type)) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { + __ encode_heap_oop(compressed_src, src->as_register()); + } else { + compressed_src = src->as_register(); + } + } + + int null_check_here = code_offset(); + + switch (type) { + case T_FLOAT: + __ fsw(src->as_float_reg(), as_Address(to_addr)); + break; + + case T_DOUBLE: + __ fsd(src->as_double_reg(), as_Address(to_addr)); + break; + + case T_ARRAY: // fall through + case T_OBJECT: + if (UseCompressedOops && !wide) { + __ sw(compressed_src, as_Address(to_addr)); + } else { + __ sd(compressed_src, as_Address(to_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ sd(src->as_register(), as_Address(to_addr)); + break; + case T_ADDRESS: + __ sd(src->as_register(), as_Address(to_addr)); + break; + case T_INT: + __ sw(src->as_register(), as_Address(to_addr)); + break; + case T_LONG: + __ sd(src->as_register_lo(), as_Address(to_addr)); + break; + case T_BYTE: // fall through + case T_BOOLEAN: + __ sb(src->as_register(), as_Address(to_addr)); + break; + case T_CHAR: // fall through + case T_SHORT: + __ sh(src->as_register(), as_Address(to_addr)); + break; + default: + ShouldNotReachHere(); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + precond(src->is_stack() && dest->is_register()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + if (dest->is_single_cpu()) { + int index = src->single_stack_ix(); + if (type == T_INT) { + __ lw(dest->as_register(), stack_slot_address(index, c_sz32)); + } else if (is_reference_type(type)) { + __ ld(dest->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(dest->as_register()); + } else if (type == T_METADATA || type == T_ADDRESS) { + __ ld(dest->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ lwu(dest->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (dest->is_double_cpu()) { + int index = src->double_stack_ix(); + Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ ld(dest->as_register_lo(), src_addr_LO); + } else if (dest->is_single_fpu()) { + int index = src->single_stack_ix(); + __ flw(dest->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (dest->is_double_fpu()) { + int index = src->double_stack_ix(); + __ fld(dest->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + deoptimize_trap(info); +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + LIR_Opr temp; + if (type == T_LONG || type == T_DOUBLE) { + temp = FrameMap::t1_long_opr; + } else { + temp = FrameMap::t1_opr; + } + + stack2reg(src, temp, src->type()); + reg2stack(temp, dest, dest->type(), false); +} + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + LIR_Address* addr = src->as_address_ptr(); + LIR_Address* from_addr = src->as_address_ptr(); + + if (addr->base()->type() == T_OBJECT) { + __ verify_oop(addr->base()->as_pointer_register()); + } + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: + __ flw(dest->as_float_reg(), as_Address(from_addr)); + break; + case T_DOUBLE: + __ fld(dest->as_double_reg(), as_Address(from_addr)); + break; + case T_ARRAY: // fall through + case T_OBJECT: + if (UseCompressedOops && !wide) { + __ lwu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld(dest->as_register(), as_Address(from_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: + // FIXME: OMG this is a horrible kludge. Any offset from an + // address that matches klass_offset_in_bytes() will be loaded + // as a word, not a long. + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ lwu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld(dest->as_register(), as_Address(from_addr)); + } + break; + case T_INT: + __ lw(dest->as_register(), as_Address(from_addr)); + break; + case T_LONG: + __ ld(dest->as_register_lo(), as_Address_lo(from_addr)); + break; + case T_BYTE: + __ lb(dest->as_register(), as_Address(from_addr)); + break; + case T_BOOLEAN: + __ lbu(dest->as_register(), as_Address(from_addr)); + break; + case T_CHAR: + __ lhu(dest->as_register(), as_Address(from_addr)); + break; + case T_SHORT: + __ lh(dest->as_register(), as_Address(from_addr)); + break; + default: + ShouldNotReachHere(); + } + + if (is_reference_type(type)) { + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } + + if (!UseZGC) { + // Load barrier has not yet been applied, so ZGC can't verify the oop here + __ verify_oop(dest->as_register()); + } + } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { + if (UseCompressedClassPointers) { + __ decode_klass_not_null(dest->as_register()); + } + } +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { + case lir_idiv: // fall through + case lir_irem: + arithmetic_idiv(op->code(), + op->in_opr1(), + op->in_opr2(), + op->in_opr3(), + op->result_opr(), + op->info()); + break; + case lir_fmad: + __ fmadd_d(op->result_opr()->as_double_reg(), + op->in_opr1()->as_double_reg(), + op->in_opr2()->as_double_reg(), + op->in_opr3()->as_double_reg()); + break; + case lir_fmaf: + __ fmadd_s(op->result_opr()->as_float_reg(), + op->in_opr1()->as_float_reg(), + op->in_opr2()->as_float_reg(), + op->in_opr3()->as_float_reg()); + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { + Label label; + + emit_branch(condition, cmp_opr1, cmp_opr2, label, /* is_far */ false, + /* is_unordered */ (condition == lir_cond_greaterEqual || condition == lir_cond_greater) ? false : true); + + Label done; + move_op(opr2, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack + false, // unaligned + false); // wide + __ j(done); + __ bind(label); + move_op(opr1, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack + false, // unaligned + false); // wide + __ bind(done); +} + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + LIR_Condition condition = op->cond(); + if (condition == lir_cond_always) { + if (op->info() != NULL) { + add_debug_info_for_branch(op->info()); + } + } else { + assert(op->in_opr1() != LIR_OprFact::illegalOpr && op->in_opr2() != LIR_OprFact::illegalOpr, "conditional branches must have legal operands"); + } + bool is_unordered = (op->ublock() == op->block()); + emit_branch(condition, op->in_opr1(), op->in_opr2(), *op->label(), /* is_far */ true, is_unordered); +} + +void LIR_Assembler::emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, + bool is_far, bool is_unordered) { + + if (cmp_flag == lir_cond_always) { + __ j(label); + return; + } + + if (cmp1->is_cpu_register()) { + Register reg1 = as_reg(cmp1); + if (cmp2->is_cpu_register()) { + Register reg2 = as_reg(cmp2); + __ c1_cmp_branch(cmp_flag, reg1, reg2, label, cmp1->type(), is_far); + } else if (cmp2->is_constant()) { + const2reg_helper(cmp2); + __ c1_cmp_branch(cmp_flag, reg1, t0, label, cmp2->type(), is_far); + } else { + ShouldNotReachHere(); + } + } else if (cmp1->is_single_fpu()) { + assert(cmp2->is_single_fpu(), "expect single float register"); + __ c1_float_cmp_branch(cmp_flag, cmp1->as_float_reg(), cmp2->as_float_reg(), label, is_far, is_unordered); + } else if (cmp1->is_double_fpu()) { + assert(cmp2->is_double_fpu(), "expect double float register"); + __ c1_float_cmp_branch(cmp_flag | C1_MacroAssembler::c1_double_branch_mask, + cmp1->as_double_reg(), cmp2->as_double_reg(), label, is_far, is_unordered); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + + switch (op->bytecode()) { + case Bytecodes::_i2f: + __ fcvt_s_w(dest->as_float_reg(), src->as_register()); break; + case Bytecodes::_i2d: + __ fcvt_d_w(dest->as_double_reg(), src->as_register()); break; + case Bytecodes::_l2d: + __ fcvt_d_l(dest->as_double_reg(), src->as_register_lo()); break; + case Bytecodes::_l2f: + __ fcvt_s_l(dest->as_float_reg(), src->as_register_lo()); break; + case Bytecodes::_f2d: + __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); break; + case Bytecodes::_d2f: + __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break; + case Bytecodes::_i2c: + __ zero_extend(dest->as_register(), src->as_register(), 16); break; + case Bytecodes::_i2l: + __ addw(dest->as_register_lo(), src->as_register(), zr); break; + case Bytecodes::_i2s: + __ sign_extend(dest->as_register(), src->as_register(), 16); break; + case Bytecodes::_i2b: + __ sign_extend(dest->as_register(), src->as_register(), 8); break; + case Bytecodes::_l2i: + _masm->block_comment("FIXME: This coulde be no-op"); + __ addw(dest->as_register(), src->as_register_lo(), zr); break; + case Bytecodes::_d2l: + __ fcvt_l_d_safe(dest->as_register_lo(), src->as_double_reg()); break; + case Bytecodes::_f2i: + __ fcvt_w_s_safe(dest->as_register(), src->as_float_reg()); break; + case Bytecodes::_f2l: + __ fcvt_l_s_safe(dest->as_register_lo(), src->as_float_reg()); break; + case Bytecodes::_d2i: + __ fcvt_w_d_safe(dest->as_register(), src->as_double_reg()); break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + __ lbu(t0, Address(op->klass()->as_register(), + InstanceKlass::init_state_offset())); + __ mvw(t1, InstanceKlass::fully_initialized); + add_debug_info_for_null_check_here(op->stub()->info()); + __ bne(t0, t1, *op->stub()->entry(), /* is_far */ true); + } + + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = op->len()->as_register(); + + if (UseSlowPath || + (!UseFastNewObjectArray && is_reference_type(op->type())) || + (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ j(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + Register tmp3 = op->tmp3()->as_register(); + if (len == tmp1) { + tmp1 = tmp3; + } else if (len == tmp2) { + tmp2 = tmp3; + } else if (len == tmp3) { + // everything is ok + } else { + __ mv(tmp3, len); + } + __ allocate_array(op->obj()->as_register(), + len, + tmp1, + tmp2, + arrayOopDesc::header_size(op->type()), + array_element_size(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done) { + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ bne(recv, t1, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ add_memory_int64(data_addr, DataLayout::counter_increment); + __ j(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))); + __ ld(t1, recv_addr); + __ bnez(t1, next_test); + __ sd(recv, recv_addr); + __ li(t1, DataLayout::counter_increment); + __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ j(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + *md = method->method_data_or_null(); + guarantee(*md != NULL, "Sanity"); + *data = ((*md)->bci_to_data(bci)); + assert(*data != NULL, "need data for type check"); + assert((*data)->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); +} + +void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Register Rtmp1, + Register k_RInfo, Register klass_RInfo, + Label *failure_target, Label *success_target) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit + __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset()))); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ bne(k_RInfo, t0, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump + } else { + // See if we get an immediate positive hit + __ beq(k_RInfo, t0, *success_target); + // check for self + __ beq(klass_RInfo, k_RInfo, *success_target); + + __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ sd(k_RInfo, Address(sp, 0)); // sub klass + __ sd(klass_RInfo, Address(sp, wordSize)); // super klass + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // load result to k_RInfo + __ ld(k_RInfo, Address(sp, 0)); + __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo + // result is a boolean + __ beqz(k_RInfo, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump + } + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtytpe_slow_path(...) + __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass + __ sd(k_RInfo, Address(sp, 0)); // super klass + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // load result to k_RInfo + __ ld(k_RInfo, Address(sp, 0)); + __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo + // result is a boolean + __ beqz(k_RInfo, *failure_target, /* is_far */ true); + // successful cast, fall thriugh to profile or jump + } +} + +void LIR_Assembler::profile_object(ciMethodData* md, ciProfileData* data, Register obj, + Register klass_RInfo, Label* obj_is_null) { + Label not_null; + __ bnez(obj, not_null); + // Object is null, update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ lbu(t0, data_addr); + __ ori(t0, t0, BitData::null_seen_byte_constant()); + __ sb(t0, data_addr); + __ j(*obj_is_null); + __ bind(not_null); +} + +void LIR_Assembler::typecheck_loaded(LIR_OpTypeCheck *op, ciKlass* k, Register k_RInfo) { + if (!k->is_loaded()) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } else { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + ciKlass* k = op->klass(); + Register Rtmp1 = noreg; + + // check if it needs to be profiled + ciMethodData* md = NULL; + ciProfileData* data = NULL; + + const bool should_profile = op->should_profile(); + if (should_profile) { + data_check(op, &md, &data); + } + Label profile_cast_success, profile_cast_failure; + Label *success_target = should_profile ? &profile_cast_success : success; + Label *failure_target = should_profile ? &profile_cast_failure : failure; + + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + if (k->is_loaded() && !UseCompressedClassPointers) { + select_different_registers(obj, dst, k_RInfo, klass_RInfo); + } else { + Rtmp1 = op->tmp3()->as_register(); + select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); + } + + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (should_profile) { + profile_object(md, data, obj, klass_RInfo, obj_is_null); + } else { + __ beqz(obj, *obj_is_null); + } + + typecheck_loaded(op, k, k_RInfo); + __ verify_oop(obj); + + if (op->fast_check()) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(t0, obj); + __ bne(t0, k_RInfo, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump + } else { + typecheck_helper_slowcheck(k, obj, Rtmp1, k_RInfo, klass_RInfo, failure_target, success_target); + } + if (should_profile) { + type_profile(obj, md, klass_RInfo, k_RInfo, data, success, failure, profile_cast_success, profile_cast_failure); + } + __ j(*success); +} + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + const bool should_profile = op->should_profile(); + + LIR_Code code = op->code(); + if (code == lir_store_check) { + typecheck_lir_store(op, should_profile); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + if (dst != obj) { + __ mv(dst, obj); + } + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ mv(dst, zr); + __ j(done); + __ bind(success); + __ mv(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register addr; + if (op->addr()->is_register()) { + addr = as_reg(op->addr()); + } else { + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); + assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); + Register cmpval = as_reg(op->cmp_value()); + + if (op->code() == lir_cas_obj) { + if (UseCompressedOops) { + Register tmp1 = op->tmp1()->as_register(); + assert(op->tmp1()->is_valid(), "must be"); + __ encode_heap_oop(tmp1, cmpval); + cmpval = tmp1; + __ encode_heap_oop(t1, newval); + newval = t1; + caswu(addr, newval, cmpval); + } else { + casl(addr, newval, cmpval); + } + } else if (op->code() == lir_cas_int) { + casw(addr, newval, cmpval); + } else { + casl(addr, newval, cmpval); + } +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch (code) { + case lir_abs: __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; + case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); + Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + if (dst->is_single_cpu()) { + Register Rdst = dst->as_register(); + if (right->is_constant()) { + int right_const = right->as_jint(); + if (Assembler::operand_valid_for_add_immediate(right_const)) { + logic_op_imm(Rdst, Rleft, right_const, code); + __ addw(Rdst, Rdst, zr); + } else { + __ mv(t0, right_const); + logic_op_reg32(Rdst, Rleft, t0, code); + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + logic_op_reg32(Rdst, Rleft, Rright, code); + } + } else { + Register Rdst = dst->as_register_lo(); + if (right->is_constant()) { + long right_const = right->as_jlong(); + if (Assembler::operand_valid_for_add_immediate(right_const)) { + logic_op_imm(Rdst, Rleft, right_const, code); + } else { + __ mv(t0, right_const); + logic_op_reg(Rdst, Rleft, t0, code); + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + logic_op_reg(Rdst, Rleft, Rright, code); + } + } +} + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr src, LIR_Opr result, LIR_Op2* op) { + ShouldNotCallThis(); +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) { + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + __ float_cmp(true, is_unordered_less ? -1 : 1, + left->as_float_reg(), right->as_float_reg(), dst->as_register()); + } else if (left->is_double_fpu()) { + __ float_cmp(false, is_unordered_less ? -1 : 1, + left->as_double_reg(), right->as_double_reg(), dst->as_register()); + } else { + ShouldNotReachHere(); + } + } else if (code == lir_cmp_l2i) { + __ cmp_l2i(dst->as_register(), left->as_register_lo(), right->as_register_lo()); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::align_call(LIR_Code code) { + // With RVC a call instruction may get 2-byte aligned. + // The address of the call instruction needs to be 4-byte aligned to + // ensure that it does not span a cache line so that it can be patched. + __ align(4); +} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(Address(op->addr(), rtype)); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + address call = __ ic_call(op->addr()); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + assert((__ offset() % 4) == 0, "bad alignment"); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + __ emit_static_call_stub(); + + assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() + <= call_stub_size(), "stub too big"); + __ end_a_stub(); +} + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == x10, "must match"); + assert(exceptionPC->as_register() == x13, "must match"); + + // exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers) + info->add_register_oop(exceptionOop); + Runtime1::StubID unwind_id; + + // get current pc information + // pc is only needed if the method has an exception handler, the unwind code does not need it. + if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { + // As no instructions have been generated yet for this LIR node it's + // possible that an oop map already exists for the current offset. + // In that case insert an dummy NOP here to ensure all oop map PCs + // are unique. See JDK-8237483. + __ nop(); + } + int pc_for_athrow_offset = __ offset(); + InternalAddress pc_for_athrow(__ pc()); + int32_t off = 0; + __ la_patchable(exceptionPC->as_register(), pc_for_athrow, off); + __ addi(exceptionPC->as_register(), exceptionPC->as_register(), off); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ verify_not_null_oop(x10); + // search an exception handler (x10: exception oop, x13: throwing pc) + if (compilation()->has_fpu_code()) { + unwind_id = Runtime1::handle_exception_id; + } else { + unwind_id = Runtime1::handle_exception_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id))); + __ nop(); +} + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == x10, "must match"); + __ j(_unwind_handler_entry); +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + Register count_reg = count->as_register(); + if (dest->is_single_cpu()) { + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + __ andi(t0, count_reg, 31); // should not shift more than 31 bits + switch (code) { + case lir_shl: __ sllw(dest_reg, left_reg, t0); break; + case lir_shr: __ sraw(dest_reg, left_reg, t0); break; + case lir_ushr: __ srlw(dest_reg, left_reg, t0); break; + default: ShouldNotReachHere(); + } + } else if (dest->is_double_cpu()) { + __ andi(t0, count_reg, 63); // should not shift more than 63 bits + switch (code) { + case lir_shl: __ sll(dest_reg, left_reg, t0); break; + case lir_shr: __ sra(dest_reg, left_reg, t0); break; + case lir_ushr: __ srl(dest_reg, left_reg, t0); break; + default: ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + if (dest->is_single_cpu()) { + assert (dest->type() == T_INT, "unexpected result type"); + assert (left->type() == T_INT, "unexpected left type"); + count &= 0x1f; + if (count != 0) { + switch (code) { + case lir_shl: __ slliw(dest_reg, left_reg, count); break; + case lir_shr: __ sraiw(dest_reg, left_reg, count); break; + case lir_ushr: __ srliw(dest_reg, left_reg, count); break; + default: ShouldNotReachHere(); + } + } else { + move_regs(left_reg, dest_reg); + } + } else if (dest->is_double_cpu()) { + count &= 0x3f; + if (count != 0) { + switch (code) { + case lir_shl: __ slli(dest_reg, left_reg, count); break; + case lir_shr: __ srai(dest_reg, left_reg, count); break; + case lir_ushr: __ srli(dest_reg, left_reg, count); break; + default: ShouldNotReachHere(); + } + } else { + move_regs(left->as_register_lo(), dest->as_register_lo()); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + Register scratch = noreg; + if (UseBiasedLocking) { + scratch = op->scratch_opr()->as_register(); + } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + Unimplemented(); + } + __ bind(*op->stub()->continuation()); +} + + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + guarantee(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(t1, known_klass->constant_encoding()); + __ sd(t1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ add_memory_int64(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call + __ add_memory_int64(counter_addr, DataLayout::counter_increment); + } +} + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { Unimplemented(); } + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + __ la(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { Unimplemented(); } + +void LIR_Assembler::check_conflict(ciKlass* exact_klass, intptr_t current_klass, + Register tmp, Label &next, Label &none, + Address mdo_addr) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + } else { + __ load_klass(tmp, tmp); + } + + __ ld(t1, mdo_addr); + __ xorr(tmp, tmp, t1); + __ andi(t0, tmp, TypeEntries::type_klass_mask); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ beqz(t0, next); + + // already unknown. Nothing to do anymore. + __ andi(t0, tmp, TypeEntries::type_unknown); + __ bnez(t0, next); + + if (TypeEntries::is_type_none(current_klass)) { + __ beqz(t1, none); + __ li(t0, (u1)TypeEntries::null_seen); + __ beq(t0, t1, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + __ membar(MacroAssembler::LoadLoad); + __ ld(t1, mdo_addr); + __ xorr(tmp, tmp, t1); + __ andi(t0, tmp, TypeEntries::type_klass_mask); + __ beqz(t0, next); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ ld(tmp, mdo_addr); + // already unknown. Nothing to do anymore. + __ andi(t0, tmp, TypeEntries::type_unknown); + __ bnez(t0, next); + } + + // different than before. Cannot keep accurate profile. + __ ld(t1, mdo_addr); + __ ori(t1, t1, TypeEntries::type_unknown); + __ sd(t1, mdo_addr); + + if (TypeEntries::is_type_none(current_klass)) { + __ j(next); + + __ bind(none); + // first time here. Set profile type. + __ sd(tmp, mdo_addr); + } +} + +void LIR_Assembler::check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, + Address mdo_addr, Label &next) { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + __ ld(t1, mdo_addr); + __ xorr(tmp, tmp, t1); + __ andi(t0, tmp, TypeEntries::type_klass_mask); + __ beqz(t0, next); +#ifdef ASSERT + { + Label ok; + __ ld(t0, mdo_addr); + __ beqz(t0, ok); + __ li(t1, (u1)TypeEntries::null_seen); + __ beq(t0, t1, ok); + // may have been set by another thread + __ membar(MacroAssembler::LoadLoad); + __ mov_metadata(t0, exact_klass->constant_encoding()); + __ ld(t1, mdo_addr); + __ xorr(t1, t0, t1); + __ andi(t1, t1, TypeEntries::type_mask); + __ beqz(t1, ok); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + // first time here. Set profile type. + __ sd(tmp, mdo_addr); + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + __ ld(tmp, mdo_addr); + // already unknown. Nothing to do anymore. + __ andi(t0, tmp, TypeEntries::type_unknown); + __ bnez(t0, next); + + __ ori(tmp, tmp, TypeEntries::type_unknown); + __ sd(tmp, mdo_addr); + } +} + +void LIR_Assembler::check_null(Register tmp, Label &update, intptr_t current_klass, + Address mdo_addr, bool do_update, Label &next) { + __ bnez(tmp, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ld(t1, mdo_addr); + __ ori(t1, t1, TypeEntries::null_seen); + __ sd(t1, mdo_addr); + } + if (do_update) { + __ j(next); + } +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + COMMENT("emit_profile_type {"); + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + assert_different_registers(tmp, t0, t1, mdo_addr.base()); + + __ verify_oop(obj); + + if (tmp != obj) { + __ mv(tmp, obj); + } + if (do_null) { + check_null(tmp, update, current_klass, mdo_addr, do_update, next); +#ifdef ASSERT + } else { + __ bnez(tmp, update); + __ stop("unexpected null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + check_exact_klass(tmp, exact_klass); + } +#endif + if (!no_conflict) { + check_conflict(exact_klass, current_klass, tmp, next, none, mdo_addr); + } else { + check_no_conflict(exact_klass, current_klass, tmp, mdo_addr, next); + } + + __ bind(next); + } + COMMENT("} emit_profile_type"); +} + +void LIR_Assembler::align_backward_branch_target() { } + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { + // tmp must be unused + assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); + + if (left->is_single_cpu()) { + assert(dest->is_single_cpu(), "expect single result reg"); + __ negw(dest->as_register(), left->as_register()); + } else if (left->is_double_cpu()) { + assert(dest->is_double_cpu(), "expect double result reg"); + __ neg(dest->as_register_lo(), left->as_register_lo()); + } else if (left->is_single_fpu()) { + assert(dest->is_single_fpu(), "expect single float result reg"); + __ fneg_s(dest->as_float_reg(), left->as_float_reg()); + } else { + assert(left->is_double_fpu(), "expect double float operand reg"); + assert(dest->is_double_fpu(), "expect double float result reg"); + __ fneg_d(dest->as_double_reg(), left->as_double_reg()); + } +} + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + + assert_different_registers(dst, t0); + if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { + int scale = adr->scale(); + intptr_t offset = adr->disp(); + LIR_Opr index_op = adr->index(); + if (index_op->is_constant()) { + offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale; + } + + if (!is_imm_in_range(offset, 12, 0)) { + __ la(t0, as_Address(adr)); + __ mv(dst, t0); + return; + } + } + + __ la(dst, as_Address(adr)); +} + + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb != NULL) { + __ far_call(RuntimeAddress(dest)); + } else { + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(dest), offset); + __ jalr(x1, t0, offset); + } + + if (info != NULL) { + add_call_info_here(info); + } +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { + move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/false, /* wide */ false); + } else { + ShouldNotReachHere(); + } +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + + Label ok; + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + bool is_unordered = false; + LIR_Condition cond = op->condition(); + emit_branch(cond, op->in_opr1(), op->in_opr2(), ok, /* is_far */ false, + /* is_unordered */(cond == lir_cond_greaterEqual || cond == lir_cond_greater) ? false : true); + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +void LIR_Assembler::membar() { + COMMENT("membar"); + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::membar_acquire() { + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); +} + +void LIR_Assembler::membar_release() { + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadload() { + __ membar(MacroAssembler::LoadLoad); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); } + +void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } + +void LIR_Assembler::on_spin_wait() { + Unimplemented(); +} + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ mv(result_reg->as_register(), xthread); +} + +void LIR_Assembler::peephole(LIR_List *lir) {} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); + bool is_oop = is_reference_type(type); + + get_op(type); + + switch (code) { + case lir_xadd: + { + RegisterOrConstant inc; + Register tmp = as_reg(tmp_op); + Register dst = as_reg(dest); + if (data->is_constant()) { + inc = RegisterOrConstant(as_long(data)); + assert_different_registers(dst, addr.base(), tmp); + assert_different_registers(tmp, t0); + } else { + inc = RegisterOrConstant(as_reg(data)); + assert_different_registers(inc.as_register(), dst, addr.base(), tmp); + } + __ la(tmp, addr); + (_masm->*add)(dst, inc, tmp); + break; + } + case lir_xchg: + { + Register tmp = tmp_op->as_register(); + Register obj = as_reg(data); + Register dst = as_reg(dest); + if (is_oop && UseCompressedOops) { + __ encode_heap_oop(t0, obj); + obj = t0; + } + assert_different_registers(obj, addr.base(), tmp, dst); + __ la(tmp, addr); + (_masm->*xchg)(dst, obj, tmp); + if (is_oop && UseCompressedOops) { + __ decode_heap_oop(dst); + } + } + break; + default: + ShouldNotReachHere(); + } + __ membar(MacroAssembler::AnyAny); +} + +int LIR_Assembler::array_element_size(BasicType type) const { + int elem_size = type2aelembytes(type); + return exact_log2(elem_size); +} + +// helper functions which checks for overflow and sets bailout if it +// occurs. Always returns a valid embeddable pointer but in the +// bailout case the pointer won't be to unique storage. +address LIR_Assembler::float_constant(float f) { + address const_addr = __ float_constant(f); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::double_constant(double d) { + address const_addr = __ double_constant(d); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::int_constant(jlong n) { + address const_addr = __ long_constant(n); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::caswu(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::uint32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int64, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + address target = NULL; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + break; + default: ShouldNotReachHere(); + } + + __ far_call(RuntimeAddress(target)); + add_call_info_here(info); +} + +void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) { + Label ok; + __ load_klass(tmp, tmp); + __ mov_metadata(t0, exact_klass->constant_encoding()); + __ beq(tmp, t0, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); +} + +void LIR_Assembler::get_op(BasicType type) { + switch (type) { + case T_INT: + xchg = &MacroAssembler::atomic_xchgalw; + add = &MacroAssembler::atomic_addalw; + break; + case T_LONG: + xchg = &MacroAssembler::atomic_xchgal; + add = &MacroAssembler::atomic_addal; + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + xchg = &MacroAssembler::atomic_xchgalwu; + add = &MacroAssembler::atomic_addalw; + } else { + xchg = &MacroAssembler::atomic_xchgal; + add = &MacroAssembler::atomic_addal; + } + break; + default: + ShouldNotReachHere(); + } +} + +// emit_opTypeCheck sub functions +void LIR_Assembler::typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + + CodeStub* stub = op->stub(); + + // check if it needs to be profiled + ciMethodData* md = NULL; + ciProfileData* data = NULL; + + if (should_profile) { + data_check(op, &md, &data); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = should_profile ? &profile_cast_success : &done; + Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); + + if (should_profile) { + profile_object(md, data, value, klass_RInfo, &done); + } else { + __ beqz(value, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + lir_store_slowcheck(k_RInfo, klass_RInfo, Rtmp1, success_target, failure_target); + + // fall through to the success case + if (should_profile) { + Register mdo = klass_RInfo; + Register recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, value); + type_profile_helper(mdo, md, data, recv, &done); + __ j(done); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld(t1, counter_addr); + __ addi(t1, t1, -DataLayout::counter_increment); + __ sd(t1, counter_addr); + __ j(*stub->entry()); + } + + __ bind(done); +} + +void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { + _masm->code_section()->relocate(adr, relocInfo::poll_type); + int pc_offset = code_offset(); + flush_debug_info(pc_offset); + info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + if (info->exception_handlers() != NULL) { + compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); + } +} + +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, + ciProfileData* data, Label* success, Label* failure, + Label& profile_cast_success, Label& profile_cast_failure) { + Register mdo = klass_RInfo; + Register recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, obj); + Label update_done; + type_profile_helper(mdo, md, data, recv, success); + __ j(*success); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld(t0, counter_addr); + __ addi(t0, t0, -DataLayout::counter_increment); + __ sd(t0, counter_addr); + __ j(*failure); +} + +void LIR_Assembler::lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1, + Label* success_target, Label* failure_target) { + // get instance klass (it's already uncompressed) + __ ld(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...) + __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo + __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass + __ sd(k_RInfo, Address(sp, 0)); // super klass + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // load result to k_RInfo + __ ld(k_RInfo, Address(sp, 0)); + __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo + // result is a boolean + __ beqz(k_RInfo, *failure_target, /* is_far */ true); +} + +void LIR_Assembler::const2reg_helper(LIR_Opr src) { + switch (src->as_constant_ptr()->type()) { + case T_INT: + case T_ADDRESS: + case T_OBJECT: + case T_ARRAY: + case T_METADATA: + const2reg(src, FrameMap::t0_opr, lir_patch_none, NULL); + break; + case T_LONG: + const2reg(src, FrameMap::t0_long_opr, lir_patch_none, NULL); + break; + case T_FLOAT: + case T_DOUBLE: + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op_reg32(Register dst, Register left, Register right, LIR_Code code) { + switch (code) { + case lir_logic_and: __ andrw(dst, left, right); break; + case lir_logic_or: __ orrw (dst, left, right); break; + case lir_logic_xor: __ xorrw(dst, left, right); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op_reg(Register dst, Register left, Register right, LIR_Code code) { + switch (code) { + case lir_logic_and: __ andr(dst, left, right); break; + case lir_logic_or: __ orr (dst, left, right); break; + case lir_logic_xor: __ xorr(dst, left, right); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op_imm(Register dst, Register left, int right, LIR_Code code) { + switch (code) { + case lir_logic_and: __ andi(dst, left, right); break; + case lir_logic_or: __ ori (dst, left, right); break; + case lir_logic_xor: __ xori(dst, left, right); break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) { + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ sd(r, Address(sp, offset_from_rsp_in_bytes)); +} + +void LIR_Assembler::store_parameter(jint c, int offset_from_rsp_in_words) { + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ li(t0, c); + __ sd(t0, Address(sp, offset_from_rsp_in_bytes)); +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP + +// ArrayCopyStub needs access to bailout +friend class ArrayCopyStub; + +private: + +#include "c1_LIRAssembler_arith_riscv.hpp" +#include "c1_LIRAssembler_arraycopy_riscv.hpp" + + int array_element_size(BasicType type) const; + + static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); + } + + Address as_Address(LIR_Address* addr, Register tmp); + + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. + address float_constant(float f); + address double_constant(double d); + address int_constant(jlong n); + + // Ensure we have a valid Address (base + offset) to a stack-slot. + Address stack_slot_address(int index, uint shift, int adjust = 0); + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done); + + void add_debug_info_for_branch(address adr, CodeEmitInfo* info); + + void casw(Register addr, Register newval, Register cmpval); + void caswu(Register addr, Register newval, Register cmpval); + void casl(Register addr, Register newval, Register cmpval); + + void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); + + void deoptimize_trap(CodeEmitInfo *info); + + enum { + // See emit_static_call_stub for detail + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), + // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller + // See emit_deopt_handler for detail + // auipc (1) + far_jump (6 or 2) + _deopt_handler_size = 1 * NativeInstruction::instruction_size + + 6 * NativeInstruction::instruction_size // or smaller + }; + + void check_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, + Label &next, Label &none, Address mdo_addr); + void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next); + + void check_exact_klass(Register tmp, ciKlass* exact_klass); + + void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next); + + void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr); + void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr); + + void get_op(BasicType type); + + // emit_typecheck_helper sub functions + void data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data); + void typecheck_helper_slowcheck(ciKlass* k, Register obj, Register Rtmp1, + Register k_RInfo, Register klass_RInfo, + Label* failure_target, Label* success_target); + void profile_object(ciMethodData* md, ciProfileData* data, Register obj, + Register klass_RInfo, Label* obj_is_null); + void typecheck_loaded(LIR_OpTypeCheck* op, ciKlass* k, Register k_RInfo); + + // emit_opTypeCheck sub functions + void typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile); + + void type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, + ciProfileData* data, Label* success, Label* failure, + Label& profile_cast_success, Label& profile_cast_failure); + + void lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1, + Label* success_target, Label* failure_target); + + void const2reg_helper(LIR_Opr src); + + void emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, bool is_far, bool is_unordered); + + void logic_op_reg32(Register dst, Register left, Register right, LIR_Code code); + void logic_op_reg(Register dst, Register left, Register right, LIR_Code code); + void logic_op_imm(Register dst, Register left, int right, LIR_Code code); + +public: + + void emit_cmove(LIR_Op4* op); + + void store_parameter(Register r, int offset_from_rsp_in_words); + void store_parameter(jint c, int offset_from_rsp_in_words); + +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp @@ -0,0 +1,1079 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +// Item will be loaded into a byte register; Intel only +void LIRItem::load_byte_item() { + load_item(); +} + + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (r->is_constant()) { + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r10_oop_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::r13_opr; } +LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r10_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::r10_opr; break; + case objectTag: opr = FrameMap::r10_oop_opr; break; + case longTag: opr = FrameMap::long10_opr; break; + case floatTag: opr = FrameMap::fpu10_float_opr; break; + case doubleTag: opr = FrameMap::fpu10_double_opr; break; + + case addressTag: // fall through + default: + ShouldNotReachHere(); + return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + LIR_Opr reg = new_register(T_INT); + set_vreg_flag(reg, LIRGenerator::byte_reg); + return reg; +} + +//--------- loading items into registers -------------------------------- + + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else if (v->type()->as_FloatConstant() != NULL) { + return jint_cast(v->type()->as_FloatConstant()->value()) == 0.0f; + } else if (v->type()->as_DoubleConstant() != NULL) { + return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0.0; + } + return false; +} + +bool LIRGenerator::can_inline_as_constant(Value v) const { + if (v->type()->as_IntConstant() != NULL) { + int value = v->type()->as_IntConstant()->value(); + // "-value" must be defined for value may be used for sub + return Assembler::operand_valid_for_add_immediate(value) && + Assembler::operand_valid_for_add_immediate(- value); + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else if (v->type()->as_LongConstant() != NULL) { + long value = v->type()->as_LongConstant()->value(); + // "-value" must be defined for value may be used for sub + return Assembler::operand_valid_for_add_immediate(value) && + Assembler::operand_valid_for_add_immediate(- value); + } else if (v->type()->as_FloatConstant() != NULL) { + return v->type()->as_FloatConstant()->value() == 0.0f; + } else if (v->type()->as_DoubleConstant() != NULL) { + return v->type()->as_DoubleConstant()->value() == 0.0; + } + return false; +} + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + if (c->as_constant() != NULL) { + long constant = 0; + switch (c->type()) { + case T_INT: constant = c->as_jint(); break; + case T_LONG: constant = c->as_jlong(); break; + default: return false; + } + // "-constant" must be defined for c may be used for sub + return Assembler::operand_valid_for_add_immediate(constant) && + Assembler::operand_valid_for_add_immediate(- constant); + } + return false; +} + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + + if (index->is_constant()) { + LIR_Const *constant = index->as_constant_ptr(); + jlong c; + if (constant->type() == T_INT) { + c = (jlong(index->as_jint()) << shift) + disp; + } else { + assert(constant->type() == T_LONG, "should be"); + c = (index->as_jlong() << shift) + disp; + } + if ((jlong)((jint)c) == c) { + return new LIR_Address(base, (jint)c, type); + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(index, tmp); + return new LIR_Address(base, tmp, type); + } + } + + return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type); +} + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type) { + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + return generate_address(array_opr, index_opr, shift, offset_in_bytes, type); +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + switch (type) { + case T_LONG: + r = LIR_OprFact::longConst(x); + break; + case T_INT: + r = LIR_OprFact::intConst(x); + break; + default: + ShouldNotReachHere(); + r = NULL; // unreachable + } + return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr reg = new_register(addr->type()); + __ load(addr, reg); + __ add(reg, load_immediate(step, addr->type()), reg); + __ store(reg, addr); +} + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); + __ cmp(condition, reg, LIR_OprFact::intConst(c)); +} + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); + __ cmp(condition, reg, reg1); +} + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid() && c > 0 && c < max_jint) { + if (is_power_of_2(c - 1)) { + __ shift_left(left, exact_log2(c - 1), tmp); + __ add(tmp, left, result); + return true; + } else if (is_power_of_2(c + 1)) { + __ shift_left(left, exact_log2(c + 1), tmp); + __ sub(tmp, left, result); + return true; + } + } + return false; +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); +} + +void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, + ciMethod* profiled_method, int profiled_bci) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = new_register(objectType); + __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(), ""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { + scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(), ""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + +// neg +void LIRGenerator::do_NegateOp(NegateOp* x) { + LIRItem from(x->x(), this); + from.load_item(); + LIR_Opr result = rlock_result(x); + __ negate(from.result(), result); +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { + + // float remainder is implemented as a direct call into the runtime + BasicTypeList signature(2); + if (x->op() == Bytecodes::_frem) { + signature.append(T_FLOAT); + signature.append(T_FLOAT); + } else { + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + } + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + const LIR_Opr result_reg = result_register_for(x->type()); + + left.load_item(); + __ move(left.result(), cc->at(0)); + right.load_item_force(cc->at(1)); + + address entry; + if (x->op() == Bytecodes::_frem) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + return; + } + + if (!left.is_register()) { + left.load_item(); + } + // Always load right hand side. + right.load_item(); + + LIR_Opr reg = rlock(x); + arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); + + set_result(x, round_item(reg)); +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + + left.load_item(); + + bool need_zero_check = true; + if (right.is_constant()) { + jlong c = right.get_jlong_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c)) { + right.dont_load_item(); + } else { + right.load_item(); + } + } else { + right.load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + rlock_result(x); + switch (x->op()) { + case Bytecodes::_lrem: + __ rem(left.result(), right.result(), x->operand()); + break; + case Bytecodes::_ldiv: + __ div(left.result(), right.result(), x->operand()); + break; + default: + ShouldNotReachHere(); + } + } else { + assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, + "expect lmul, ladd or lsub"); + // add, sub, mul + left.load_item(); + if (!right.is_register()) { + if (x->op() == Bytecodes::_lmul || + !right.is_constant() || + (x->op() == Bytecodes::_ladd && + !Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || + (x->op() == Bytecodes::_lsub && + !Assembler::operand_valid_for_add_immediate(-right.get_jlong_constant()))) { + right.load_item(); + } else { // add, sub + assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expected ladd or lsub"); + // don't load constants to save register + right.load_nonconstant(); + } + } + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + + // Test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + left_arg->load_item(); + // do not need to load right, as we can handle stack and constants + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + + rlock_result(x); + + bool need_zero_check = true; + if (right.is_constant()) { + jint c = right.get_jint_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c)) { + right_arg->dont_load_item(); + } else { + right_arg->load_item(); + } + } else { + right_arg->load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } + + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() && + ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) || + (x->op() == Bytecodes::_isub && !Assembler::operand_valid_for_add_immediate(-right.get_jint_constant())))) { + right.load_nonconstant(); + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); + } else { + assert (x->op() == Bytecodes::_imul, "expect imul"); + if (right.is_constant()) { + jint c = right.get_jint_constant(); + if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { + right_arg->dont_load_item(); + } else { + // Cannot use constant op. + right_arg->load_item(); + } + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // when an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + default: ShouldNotReachHere(); return; + } +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + LIRItem value(x->x(), this); + LIRItem count(x->y(), this); + + value.load_item(); + if (count.is_constant()) { + assert(count.type()->as_IntConstant() != NULL || count.type()->as_LongConstant() != NULL , "should be"); + count.dont_load_item(); + } else { + count.load_item(); + } + + LIR_Opr res = rlock_result(x); + shift_op(x->op(), res, value.result(), count.result(), LIR_OprFact::illegalOpr); +} + + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + rlock_result(x); + ValueTag tag = right.type()->tag(); + if (right.is_constant() && + ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || + (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) { + right.dont_load_item(); + } else { + right.load_item(); + } + + switch (x->op()) { + case Bytecodes::_iand: // fall through + case Bytecodes::_land: + __ logical_and(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ior: // fall through + case Bytecodes::_lor: + __ logical_or(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ixor: // fall through + case Bytecodes::_lxor: + __ logical_xor(left.result(), right.result(), x->operand()); break; + default: Unimplemented(); + } +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + ValueTag tag = x->x()->type()->tag(); + if (tag == longTag) { + left.set_destroys_register(); + } + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) { + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + new_value.load_item(); + cmp_value.load_item(); + LIR_Opr result = new_register(T_INT); + if (is_reference_type(type)) { + __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result); + } else if (type == T_INT) { + __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); + } else if (type == T_LONG) { + __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); + } else { + ShouldNotReachHere(); + } + __ logical_xor(FrameMap::r5_opr, LIR_OprFact::intConst(1), result); + return result; +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { + bool is_oop = is_reference_type(type); + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xchg(addr, value.result(), result, tmp); + return result; +} + +LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xadd(addr, value.result(), result, tmp); + return result; +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), + "wrong type"); + + switch (x->id()) { + case vmIntrinsics::_dexp: // fall through + case vmIntrinsics::_dlog: // fall through + case vmIntrinsics::_dpow: // fall through + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dsin: // fall through + case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dlog10: + do_LibmIntrinsic(x); + break; + case vmIntrinsics::_dabs: // fall through + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + default: + ShouldNotReachHere(); + } + break; + } + default: + ShouldNotReachHere(); + } +} + +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); + CallingConvention* cc = NULL; + BasicTypeList signature(1); + signature.append(T_DOUBLE); + if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } + cc = frame_map()->c_calling_convention(&signature); + value.load_item_force(cc->at(0)); + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); + value1.set_destroys_register(); + value1.load_item_force(cc->at(1)); + } + switch (x->id()) { + case vmIntrinsics::_dexp: + if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dlog: + if (StubRoutines::dlog() != NULL) { __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dlog10: + if (StubRoutines::dlog10() != NULL) { __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dsin: + if (StubRoutines::dsin() != NULL) { __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dcos: + if (StubRoutines::dcos() != NULL) { __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dtan: + if (StubRoutines::dtan() != NULL) { __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); } + break; + case vmIntrinsics::_dpow: + if (StubRoutines::dpow() != NULL) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); } + else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); } + break; + default: ShouldNotReachHere(); + } + __ move(result_reg, calc_result); +} + + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call) + + // The java calling convention will give us enough registers + // so that on the stub side the args will be perfect already. + // On the other slow/special case side we call C and the arg + // positions are not similar enough to pick one as the best. + // Also because the java calling convention is a "shifted" version + // of the C convention we can process the java args trivially into C + // args without worry of overwriting during the xfer + + src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); + src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); + dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); + dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); + length.load_item_force (FrameMap::as_opr(j_rarg4)); + + LIR_Opr tmp = FrameMap::as_opr(j_rarg5); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type = NULL; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, + expected_type, flags, info); // does add_safepoint +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + ShouldNotReachHere(); +} + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + ShouldNotReachHere(); +} + +void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 3, "wrong type"); + assert(UseFMA, "Needs FMA instructions support."); + LIRItem value(x->argument_at(0), this); + LIRItem value1(x->argument_at(1), this); + LIRItem value2(x->argument_at(2), this); + + value.load_item(); + value1.load_item(); + value2.load_item(); + + LIR_Opr calc_input = value.result(); + LIR_Opr calc_input1 = value1.result(); + LIR_Opr calc_input2 = value2.result(); + LIR_Opr calc_result = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_fmaD: __ fmad(calc_input, calc_input1, calc_input2, calc_result); break; + case vmIntrinsics::_fmaF: __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break; + default: ShouldNotReachHere(); + } +} + +void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { + fatal("vectorizedMismatch intrinsic is not implemented on this platform"); +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + LIRItem value(x->value(), this); + value.load_item(); + LIR_Opr input = value.result(); + LIR_Opr result = rlock(x); + + // arguments of lir_convert + LIR_Opr conv_input = input; + LIR_Opr conv_result = result; + + __ convert(x->op(), conv_input, conv_result); + + assert(result->is_virtual(), "result must be virtual register"); + set_result(x, result); +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { +#ifndef PRODUCT + if (PrintNotLoaded && !x->klass()->is_loaded()) { + tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); + } +#endif + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::r12_oop_opr, + FrameMap::r15_oop_opr, + FrameMap::r14_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r13_metadata_opr, + info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item_force(FrameMap::r9_opr); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r12_oop_opr; + LIR_Opr tmp2 = FrameMap::r14_oop_opr; + LIR_Opr tmp3 = FrameMap::r15_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r13_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + LIRItem length(x->length(), this); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + CodeEmitInfo* info = state_for(x, x->state()); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r12_oop_opr; + LIR_Opr tmp2 = FrameMap::r14_oop_opr; + LIR_Opr tmp3 = FrameMap::r15_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r13_metadata_opr; + + length.load_item_force(FrameMap::r9_opr); + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + + store_stack_parameter(size->result(), in_ByteSize(i * BytesPerInt)); + } + + LIR_Opr klass_reg = FrameMap::r10_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::r9_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::r12_opr; + __ move(FrameMap::sp_opr, varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || + (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = + (x->needs_exception_state() ? state_for(x) : + state_for(x, x->state_before(), true /*ignore_xhandler*/ )); + + CodeStub* stub = NULL; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, + info_for_exception); + } else if (x->is_invokespecial_receiver_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new DeoptimizeStub(info_for_exception, + Deoptimization::Reason_class_check, + Deoptimization::Action_none); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ checkcast(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + + // result and test object may not be in same register + LIR_Opr reg = rlock_result(x); + CodeEmitInfo* patching_info = NULL; + if ((!x->klass()->is_loaded() || PatchALot)) { + // must do this before locking the destination register as an oop register + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ instanceof(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_If(If* x) { + // If should have two successors + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + bool is_safepoint = x->is_safepoint(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + if (tag == longTag) { + // for longs, only conditions "eql", "neq", "lss", "geq" are valid; + // mirror for other conditions + if (cond == If::gtr || cond == If::leq) { + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } + xin->load_item(); + yin->load_item(); + + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), + x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + + // Generate branch profiling. Profiling code doesn't kill flags. + __ cmp(lir_cond(cond), left, right); + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ branch(lir_cond(cond), x->tsux(), x->usux()); + } else { + __ branch(lir_cond(cond), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(xthread); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + __ volatile_store_mem_reg(value, address, info); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + __ volatile_load_mem_reg(address, result, info); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "c1/c1_LIR.hpp" + +FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg1 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); + assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || + base()->type() == T_METADATA, "wrong type for addresses"); +} +#endif // PRODUCT Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/bitMap.inline.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on RISCV +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_LINEARSCAN_RISCV_HPP +#define CPU_RISCV_C1_LINEARSCAN_RISCV_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) +{ + return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + return 1; +} + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); + if (assigned_reg < pd_first_callee_saved_reg) { + return true; + } + if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg_1) { + return true; + } + if (assigned_reg > pd_last_callee_saved_fpu_reg_1 && assigned_reg < pd_first_callee_saved_fpu_reg_2) { + return true; + } + if (assigned_reg > pd_last_callee_saved_fpu_reg_2 && assigned_reg < pd_last_fpu_reg) { + return true; + } + return false; +} + +inline void LinearScan::pd_add_temps(LIR_Op* op) { + // No special case behaviours yet +} + + +// Implementation of LinearScanWalker + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) +{ + if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { + assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; + } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || + cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; + } + return false; +} + +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -0,0 +1,455 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LIR.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markWord.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, + FloatRegister freg0, FloatRegister freg1, + Register result) +{ + if (is_float) { + float_compare(result, freg0, freg1, unordered_result); + } else { + double_compare(result, freg0, freg1, unordered_result); + } +} + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + int null_check_offset = -1; + + verify_oop(obj); + + // save object being locked into the BasicObjectLock + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + null_check_offset = offset(); + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(hdr, obj); + lwu(hdr, Address(hdr, Klass::access_flags_offset())); + andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); + bnez(t0, slow_case, true /* is_far */); + } + + if (UseBiasedLocking) { + assert(scratch != noreg, "should have scratch register at this point"); + biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); + } + + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + ori(hdr, hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + la(t1, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, t1, t0, done, /*fallthough*/NULL); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr -sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub(hdr, hdr, sp); + li(t0, aligned_mask - os::vm_page_size()); + andr(hdr, hdr, t0); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + sd(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + bind(done); + if (PrintBiasedLockingStatistics) { + la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); + add_memory_int32(Address(t1, 0), 1); + } + return null_check_offset; +} + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + + if (UseBiasedLocking) { + // load object + ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); + if (!UseBiasedLocking) { + // load object + ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + la(t0, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, t0, t1, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, t1, done, &slow_case); + } + bind(done); +} + +// Defines obj, preserves var_size_in_bytes +void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, /* is_far */ true); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, /* is_far */ true); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, tmp1, tmp2); + ld(tmp1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fitr in an int32_t + mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); + } + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass + encode_klass_not_null(tmp1, klass); + sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes())); + } else { + sd(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + } + + if (len->is_valid()) { + sw(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } else if (UseCompressedClassPointers) { + store_klass_gap(obj, zr); + } +} + +// preserves obj, destroys len_in_bytes +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + Label done; + + // len_in_bytes is positive and ptr sized + sub(len_in_bytes, len_in_bytes, hdr_size_in_bytes); + beqz(len_in_bytes, done); + + // Preserve obj + if (hdr_size_in_bytes) { + add(obj, obj, hdr_size_in_bytes); + } + zero_memory(obj, len_in_bytes, tmp); + if (hdr_size_in_bytes) { + sub(obj, obj, hdr_size_in_bytes); + } + + bind(done); +} + +void C1_MacroAssembler::allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case) { + assert_different_registers(obj, tmp1, tmp2); + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + + try_allocate(obj, noreg, object_size * BytesPerWord, tmp1, tmp2, slow_case); + + initialize_object(obj, klass, noreg, object_size * HeapWordSize, tmp1, tmp2, UseTLAB); +} + +void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, bool is_tlab_allocated) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, tmp1, tmp2); + + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // clear rest of allocated space + const Register index = tmp2; + // 16: multipler for threshold + const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below) + if (var_size_in_bytes != noreg) { + mv(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, tmp1); + } else if (con_size_in_bytes <= threshold) { + // use explicit null stores + int i = hdr_size_in_bytes; + if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) { // 2: multipler for BytesPerWord + sd(zr, Address(obj, i)); + i += BytesPerWord; + } + for (; i < con_size_in_bytes; i += BytesPerWord) { + sd(zr, Address(obj, i)); + } + } else if (con_size_in_bytes > hdr_size_in_bytes) { + block_comment("zero memory"); + // use loop to null out the fields + int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord; + mv(index, words / 8); // 8: byte size + + const int unroll = 8; // Number of sd(zr) instructions we'll unroll + int remainder = words % unroll; + la(t0, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord)); + + Label entry_point, loop; + j(entry_point); + + bind(loop); + sub(index, index, 1); + for (int i = -unroll; i < 0; i++) { + if (-i == remainder) { + bind(entry_point); + } + sd(zr, Address(t0, i * wordSize)); + } + if (remainder == 0) { + bind(entry_point); + } + add(t0, t0, unroll * wordSize); + bnez(index, loop); + } + } + + membar(MacroAssembler::StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == x10, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case) { + assert_different_registers(obj, len, tmp1, tmp2, klass); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be multiple of 2 for masking code to work"); + + // check for negative or excessive length + mv(t0, (int32_t)max_array_allocation_length); + bgeu(len, t0, slow_case, /* is_far */ true); + + const Register arr_size = tmp2; // okay to be the same + // align object end + mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); + shadd(arr_size, len, arr_size, t0, f); + andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); + + try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case); + + initialize_header(obj, klass, len, tmp1, tmp2); + + // clear rest of allocated space + const Register len_zero = len; + initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero); + + membar(MacroAssembler::StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == x10, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, Label &L) { + verify_oop(receiver); + // explicit NULL check not needed since load from [klass_offset] causes a trap + // check against inline cache + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check"); + cmp_klass(receiver, iCache, t0, L); +} + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize); + + // Insert nmethod entry barrier into frame. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->nmethod_entry_barrier(this); +} + +void C1_MacroAssembler::remove_frame(int framesize) { + MacroAssembler::remove_frame(framesize); +} + + +void C1_MacroAssembler::verified_entry(bool breakAtEntry) { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a J, JAL or NOP. + // Make it a NOP. + + nop(); +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { + // fp + -2: link + // + -1: return address + // + 0: argument with offset 0 + // + 1: argument with offset 1 + // + 2: ... + ld(reg, Address(fp, offset_in_words * BytesPerWord)); +} + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) { + return; + } + verify_oop_addr(Address(sp, stack_offset), "oop"); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + bnez(r, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_x10, bool inv_x9, bool inv_x12, bool inv_x13, bool inv_x14, bool inv_x15) { +#ifdef ASSERT + static int nn; + if (inv_x10) { mv(x10, 0xDEAD); } + if (inv_x9) { mv(x9, 0xDEAD); } + if (inv_x12) { mv(x12, nn++); } + if (inv_x13) { mv(x13, 0xDEAD); } + if (inv_x14) { mv(x14, 0xDEAD); } + if (inv_x15) { mv(x15, 0xDEAD); } +#endif // ASSERT +} +#endif // ifndef PRODUCT + +typedef void (C1_MacroAssembler::*c1_cond_branch_insn)(Register op1, Register op2, Label& label, bool is_far); +typedef void (C1_MacroAssembler::*c1_float_cond_branch_insn)(FloatRegister op1, FloatRegister op2, + Label& label, bool is_far, bool is_unordered); + +static c1_cond_branch_insn c1_cond_branch[] = +{ + /* SHORT branches */ + (c1_cond_branch_insn)&Assembler::beq, + (c1_cond_branch_insn)&Assembler::bne, + (c1_cond_branch_insn)&Assembler::blt, + (c1_cond_branch_insn)&Assembler::ble, + (c1_cond_branch_insn)&Assembler::bge, + (c1_cond_branch_insn)&Assembler::bgt, + (c1_cond_branch_insn)&Assembler::bleu, // lir_cond_belowEqual + (c1_cond_branch_insn)&Assembler::bgeu // lir_cond_aboveEqual +}; + +static c1_float_cond_branch_insn c1_float_cond_branch[] = +{ + /* FLOAT branches */ + (c1_float_cond_branch_insn)&MacroAssembler::float_beq, + (c1_float_cond_branch_insn)&MacroAssembler::float_bne, + (c1_float_cond_branch_insn)&MacroAssembler::float_blt, + (c1_float_cond_branch_insn)&MacroAssembler::float_ble, + (c1_float_cond_branch_insn)&MacroAssembler::float_bge, + (c1_float_cond_branch_insn)&MacroAssembler::float_bgt, + NULL, // lir_cond_belowEqual + NULL, // lir_cond_aboveEqual + + /* DOUBLE branches */ + (c1_float_cond_branch_insn)&MacroAssembler::double_beq, + (c1_float_cond_branch_insn)&MacroAssembler::double_bne, + (c1_float_cond_branch_insn)&MacroAssembler::double_blt, + (c1_float_cond_branch_insn)&MacroAssembler::double_ble, + (c1_float_cond_branch_insn)&MacroAssembler::double_bge, + (c1_float_cond_branch_insn)&MacroAssembler::double_bgt, + NULL, // lir_cond_belowEqual + NULL // lir_cond_aboveEqual +}; + +void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, + BasicType type, bool is_far) { + if (type == T_OBJECT || type == T_ARRAY) { + assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual"); + if (cmpFlag == lir_cond_equal) { + beq(op1, op2, label, is_far); + } else { + bne(op1, op2, label, is_far); + } + } else { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])), + "invalid c1 conditional branch index"); + (this->*c1_cond_branch[cmpFlag])(op1, op2, label, is_far); + } +} + +void C1_MacroAssembler::c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, + bool is_far, bool is_unordered) { + assert(cmpFlag >= 0 && + cmpFlag < (int)(sizeof(c1_float_cond_branch) / sizeof(c1_float_cond_branch[0])), + "invalid c1 float conditional branch index"); + (this->*c1_float_cond_branch[cmpFlag])(op1, op2, label, is_far, is_unordered); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP +#define CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP + +using MacroAssembler::build_frame; +using MacroAssembler::null_check; + +// C1_MacroAssembler contains high-level macros for C1 + + private: + int _rsp_offset; // track rsp changes + // initialization + void pd_init() { _rsp_offset = 0; } + + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result); + + // locking + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved + // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must be x10 & must point to the displaced header location, contents destroyed + void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + // obj : will contain pointer to allocated object + // t1, t2 : temp registers - contents destroyed + // header_size: size of object header in words + // object_size: total size of object in words + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x00FFFFFF + }; + + // allocation of arrays + // obj : will contain pointer to allocated object + // len : array length in number of elements + // t : temp register - contents destroyed + // header_size: size of object header in words + // f : element scale factor + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case); + + int rsp_offset() const { return _rsp_offset; } + + void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN; + + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + + void load_parameter(int offset_in_words, Register reg); + + void inline_cache_check(Register receiver, Register iCache, Label &L); + + static const int c1_double_branch_mask = 1 << 3; // depend on c1_float_cond_branch + void c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, BasicType type, bool is_far); + void c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, + bool is_far, bool is_unordered = false); + +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp @@ -0,0 +1,1172 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" +#include "compiler/oopMap.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) { + // setup registers + assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result, + "registers must be different"); + assert(oop_result != xthread && metadata_result != xthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + + mv(c_rarg0, xthread); + set_num_rt_args(0); // Nothing on stack + + Label retaddr; + set_last_Java_frame(sp, fp, retaddr, t0); + + // do the call + int32_t off = 0; + la_patchable(t0, RuntimeAddress(entry), off); + jalr(x1, t0, off); + bind(retaddr); + int call_offset = offset(); + // verify callee-saved register +#ifdef ASSERT + push_reg(x10, sp); + { Label L; + get_thread(x10); + beq(xthread, x10, L); + stop("StubAssembler::call_RT: xthread not callee saved?"); + bind(L); + } + pop_reg(x10, sp); +#endif + reset_last_Java_frame(true); + + // check for pending exceptions + { Label L; + // check for pending exceptions (java_thread is set upon return) + ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + beqz(t0, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared + if (oop_result->is_valid()) { + sd(zr, Address(xthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { + sd(zr, Address(xthread, JavaThread::vm_result_2_offset())); + } + if (frame_size() == no_frame_size) { + leave(); + far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + } + bind(L); + } + // get oop results if there are any and reset the values in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, xthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, xthread); + } + return call_offset; +} + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) { + mv(c_rarg1, arg1); + return call_RT(oop_result, metadata_result, entry, 1); +} + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) { + const int arg_num = 2; + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { + xorr(arg1, arg1, arg2); + xorr(arg2, arg1, arg2); + xorr(arg1, arg1, arg2); + } else { + mv(c_rarg2, arg2); + mv(c_rarg1, arg1); + } + } else { + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + } + return call_RT(oop_result, metadata_result, entry, arg_num); +} + +int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + const int arg_num = 3; + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || + arg2 == c_rarg1 || arg2 == c_rarg3 || + arg3 == c_rarg1 || arg3 == c_rarg2) { + const int arg1_sp_offset = 0; + const int arg2_sp_offset = 1; + const int arg3_sp_offset = 2; + addi(sp, sp, -(arg_num + 1) * wordSize); + sd(arg1, Address(sp, arg1_sp_offset * wordSize)); + sd(arg2, Address(sp, arg2_sp_offset * wordSize)); + sd(arg3, Address(sp, arg3_sp_offset * wordSize)); + + ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize)); + ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize)); + ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize)); + addi(sp, sp, (arg_num + 1) * wordSize); + } else { + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + mv(c_rarg3, arg3); + } + return call_RT(oop_result, metadata_result, entry, arg_num); +} + +enum return_state_t { + does_not_return, requires_return +}; + +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; + bool _return_state; + + public: + StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +};; + +void StubAssembler::prologue(const char* name, bool must_gc_arguments) { + set_info(name, must_gc_arguments); + enter(); +} + +void StubAssembler::epilogue() { + leave(); + ret(); +} + +#define __ _sasm-> + +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { + _sasm = sasm; + _return_state = return_state; + __ prologue(name, must_gc_arguments); +} + +// load parameters that were stored with LIR_Assembler::store_parameter +// Note: offsets for store_parameter and load_argument must match +void StubFrame::load_argument(int offset_in_words, Register reg) { + __ load_parameter(offset_in_words, reg); +} + + +StubFrame::~StubFrame() { + if (_return_state == requires_return) { + __ epilogue(); + } else { + __ should_not_reach_here(); + } + _sasm = NULL; +} + +#undef __ + + +// Implementation of Runtime1 + +#define __ sasm-> + +const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; + +// Stack layout for saving/restoring all the registers needed during a runtime +// call (this includes deoptimization) +// Note: note that users of this frame may well have arguments to some runtime +// while these values are on the stack. These positions neglect those arguments +// but the code in save_live_registers will take the argument count into +// account. +// + +enum reg_save_layout { + reg_save_frame_size = 32 /* float */ + 30 /* integer excluding x3, x4 */ +}; + +// Save off registers which might be killed by calls into the runtime. +// Tries to smart of about FPU registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. +// In all other cases it should be sufficient to simply save their +// current value. + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + assert_cond(oop_map != NULL); + + // caller save registers only, see FrameMap::initialize + // in c1_FrameMap_riscv.cpp for detail. + const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = { + x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31 + }; + + for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) { + Register r = caller_save_cpu_regs[i]; + int sp_offset = cpu_reg_save_offsets[r->encoding()]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + + // fpu_regs + if (save_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + int sp_offset = fpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } + } + return oop_map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, + bool save_fpu_registers = true) { + __ block_comment("save_live_registers"); + + // if the number of pushed regs is odd, one slot will be reserved for alignment + __ push_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) + + if (save_fpu_registers) { + // float registers + __ addi(sp, sp, -(FrameMap::nof_fpu_regs * wordSize)); + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + __ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } + } else { + // we define reg_save_layout = 62 as the fixed frame size, + // we should also sub 32 * wordSize to sp when save_fpu_registers == false + __ addi(sp, sp, -32 * wordSize); + } + + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + __ fld(as_FloatRegister(i), Address(sp, i * wordSize)); + } + __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize); + } else { + // we define reg_save_layout = 64 as the fixed frame size, + // we should also add 32 * wordSize to sp when save_fpu_registers == false + __ addi(sp, sp, 32 * wordSize); + } + + // if the number of popped regs is odd, the reserved slot for alignment will be removed + __ pop_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) +} + +static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + __ fld(as_FloatRegister(i), Address(sp, i * wordSize)); + } + __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize); + } else { + // we define reg_save_layout = 64 as the fixed frame size, + // we should also add 32 * wordSize to sp when save_fpu_registers == false + __ addi(sp, sp, 32 * wordSize); + } + + // pop integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) & x10 + // there is one reserved slot for alignment on the stack in save_live_registers(). + __ pop_reg(RegSet::range(x5, x9), sp); // pop x5 ~ x9 with the reserved slot for alignment + __ pop_reg(RegSet::range(x11, x31), sp); // pop x11 ~ x31; x10 will be automatically skipped here +} + +void Runtime1::initialize_pd() { + int i = 0; + int sp_offset = 0; + const int step = 2; // SP offsets are in halfwords + + // all float registers are saved explicitly + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + fpu_reg_save_offsets[i] = sp_offset; + sp_offset += step; + } + + // a slot reserved for stack 16-byte alignment, see MacroAssembler::push_reg + sp_offset += step; + // we save x5 ~ x31, except x0 ~ x4: loop starts from x5 + for (i = 5; i < FrameMap::nof_cpu_regs; i++) { + cpu_reg_save_offsets[i] = sp_offset; + sp_offset += step; + } +} + +// target: the entry point of the method that creates and posts the exception oop +// has_argument: true if the exception needs arguments (passed in t0 and t1) + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + int call_offset = 0; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + __ mv(c_rarg1, t0); + __ mv(c_rarg2, t1); + call_offset = __ call_RT(noreg, noreg, target); + } + OopMapSet* oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters + const Register exception_oop = x10; + const Register exception_pc = x13; + + OopMapSet* oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + OopMap* oop_map = NULL; + + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, 1 /* thread */); + + // load and clear pending exception oop into x10 + __ ld(exception_oop, Address(xthread, Thread::pending_exception_offset())); + __ sd(zr, Address(xthread, Thread::pending_exception_offset())); + + // load issuing PC (the return address for this stub) into x13 + __ ld(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); + + // make sure that the vm_results are cleared (may be unnecessary) + __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); + __ sd(zr, Address(xthread, JavaThread::vm_result_2_offset())); + break; + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: { + // At this point all registers except exception oop (x10) and + // exception pc (ra) are dead. + const int frame_size = 2 /* fp, return address */; + oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); + sasm->set_frame_size(frame_size); + break; + } + default: ShouldNotReachHere(); + } + + // verify that only x10 and x13 are valid at this time + __ invalidate_registers(false, true, true, false, true, true); + // verify that x10 contains a valid exception + __ verify_not_null_oop(exception_oop); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + __ ld(t0, Address(xthread, JavaThread::exception_oop_offset())); + __ beqz(t0, oop_empty); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); + __ beqz(t0, pc_empty); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save exception oop and issuing pc into JavaThread + // (exception handler will load it from here) + __ sd(exception_oop, Address(xthread, JavaThread::exception_oop_offset())); + __ sd(exception_pc, Address(xthread, JavaThread::exception_pc_offset())); + + // patch throwing pc into return address (has bci & oop map) + __ sd(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); + + // compute the exception handler. + // the exception oop and the throwing pc are read from the fields in JavaThread + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + guarantee(oop_map != NULL, "NULL oop_map!"); + oop_maps->add_gc_map(call_offset, oop_map); + + // x10: handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // only x10 is valid at this time, all other registers have been destroyed by the runtime call + __ invalidate_registers(false, true, true, true, true, true); + + // patch the return address, this stub will directly return to the exception handler + __ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord)); + + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // incoming parameters + const Register exception_oop = x10; + // other registers used in this stub + const Register handler_addr = x11; + + // verify that only x10, is valid at this time + __ invalidate_registers(false, true, true, true, true, true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld(t0, Address(xthread, JavaThread::exception_oop_offset())); + __ beqz(t0, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); + __ beqz(t0, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Save our return address because + // exception_handler_for_return_address will destroy it. We also + // save exception_oop + __ addi(sp, sp, -2 * wordSize); + __ sd(exception_oop, Address(sp, wordSize)); + __ sd(ra, Address(sp)); + + // search the exception handler address of the caller (using the return address) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, ra); + // x10: exception handler address of the caller + + // Only x10 is valid at this time; all other registers have been + // destroyed by the call. + __ invalidate_registers(false, true, true, true, false, true); + + // move result of call into correct register + __ mv(handler_addr, x10); + + // get throwing pc (= return address). + // ra has been destroyed by the call + __ ld(ra, Address(sp)); + __ ld(exception_oop, Address(sp, wordSize)); + __ addi(sp, sp, 2 * wordSize); + __ mv(x13, ra); + + __ verify_not_null_oop(exception_oop); + + // continue at exception handler (return address removed) + // note: do *not* remove arguments when unwinding the + // activation since the caller assumes having + // all arguments on the stack when entering the + // runtime to determine the exception handler + // (GC happens at call site with arguments!) + // x10: exception oop + // x13: throwing pc + // x11: exception handler + __ jr(handler_addr); +} + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // use the maximum number of runtime-arguments here because it is difficult to + // distinguish each RT-Call. + // Note: This number affects also the RT-Call in generate_handle_exception because + // the oop-map is shared for all calls. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + + __ mv(c_rarg0, xthread); + Label retaddr; + __ set_last_Java_frame(sp, fp, retaddr, t0); + // do the call + int32_t off = 0; + __ la_patchable(t0, RuntimeAddress(target), off); + __ jalr(x1, t0, off); + __ bind(retaddr); + OopMapSet* oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(__ offset(), oop_map); + // verify callee-saved register +#ifdef ASSERT + { Label L; + __ get_thread(t0); + __ beq(xthread, t0, L); + __ stop("StubAssembler::call_RT: xthread not callee saved?"); + __ bind(L); + } +#endif + __ reset_last_Java_frame(true); + +#ifdef ASSERT + // Check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); + __ beqz(t0, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Runtime will return true if the nmethod has been deoptimized, this is the + // expected scenario and anything else is an error. Note that we maintain a + // check on the result purely as a defensive measure. + Label no_deopt; + __ beqz(x10, no_deopt); // Have we deoptimized? + + // Perform a re-execute. The proper return address is already on the stack, + // we just need to restore registers, pop all of our frames but the return + // address and jump to the deopt blob. + + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + + __ bind(no_deopt); + __ stop("deopt not performed"); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + // for better readability + const bool dont_gc_arguments = false; + + // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu + bool save_fpu_registers = true; + + // stub code & info for the different stubs + OopMapSet* oop_maps = NULL; + switch (id) { + { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + __ leave(); + __ ret(); + } + break; + + case throw_div0_exception_id: + { + StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = x13; // Incoming + Register obj = x10; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Label slow_path; + Register obj_size = x12; + Register tmp1 = x9; + Register tmp2 = x14; + assert_different_registers(klass, obj, obj_size, tmp1, tmp2); + + const int sp_offset = 2; + const int x9_offset = 1; + const int zr_offset = 0; + __ addi(sp, sp, -(sp_offset * wordSize)); + __ sd(x9, Address(sp, x9_offset * wordSize)); + __ sd(zr, Address(sp, zr_offset * wordSize)); + + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ lbu(t0, Address(klass, InstanceKlass::init_state_offset())); + __ mv(t1, InstanceKlass::fully_initialized); + __ bne(t0, t1, slow_path); + } + +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ lw(obj_size, Address(klass, Klass::layout_helper_offset())); + // make sure it's an instance. For instances, layout helper is a positive number. + // For arrays, layout helper is a negative number + __ blez(obj_size, not_ok); + __ andi(t0, obj_size, Klass::_lh_instance_slow_path_bit); + __ beqz(t0, ok); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // get the instance size + __ lwu(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ eden_allocate(obj, obj_size, 0, tmp1, slow_path); + + __ initialize_object(obj, klass, obj_size, 0, tmp1, tmp2, /* is_tlab_allocated */ false); + __ verify_oop(obj); + __ ld(x9, Address(sp, x9_offset * wordSize)); + __ ld(zr, Address(sp, zr_offset * wordSize)); + __ addi(sp, sp, sp_offset * wordSize); + __ ret(); + + __ bind(slow_path); + __ ld(x9, Address(sp, x9_offset * wordSize)); + __ ld(zr, Address(sp, zr_offset * wordSize)); + __ addi(sp, sp, sp_offset * wordSize); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + __ verify_oop(obj); + __ leave(); + __ ret(); + + // x10: new instance + } + + break; + + case counter_overflow_id: + { + Register bci = x10; + Register method = x11; + __ enter(); + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + + const int bci_off = 0; + const int method_off = 1; + // Retrieve bci + __ lw(bci, Address(fp, bci_off * BytesPerWord)); + // And a pointer to the Method* + __ ld(method, Address(fp, method_off * BytesPerWord)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + __ ret(); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + Register length = x9; // Incoming + Register klass = x13; // Incoming + Register obj = x10; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register tmp = obj; + __ lwu(tmp, Address(klass, Klass::layout_helper_offset())); + __ sraiw(tmp, tmp, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value); + __ mv(t0, tag); + __ beq(t0, tmp, ok); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Register arr_size = x14; + Register tmp1 = x12; + Register tmp2 = x15; + Label slow_path; + assert_different_registers(length, klass, obj, arr_size, tmp1, tmp2); + + // check that array length is small enough for fast path. + __ mv(t0, C1_MacroAssembler::max_array_allocation_length); + __ bgtu(length, t0, slow_path); + + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + __ lwu(tmp1, Address(klass, Klass::layout_helper_offset())); + __ andi(t0, tmp1, 0x1f); + __ sll(arr_size, length, t0); + int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); + int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; + __ slli(tmp1, tmp1, XLEN - lh_header_size_msb); + __ srli(tmp1, tmp1, XLEN - lh_header_size_width); + __ add(arr_size, arr_size, tmp1); + __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); + + __ eden_allocate(obj, arr_size, 0, tmp1, slow_path); // preserves arr_size + + __ initialize_header(obj, klass, length, tmp1, tmp2); + __ lbu(tmp1, Address(klass, + in_bytes(Klass::layout_helper_offset()) + + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andi(tmp1, tmp1, Klass::_lh_header_size_mask); + __ sub(arr_size, arr_size, tmp1); // body length + __ add(tmp1, tmp1, obj); // body start + __ initialize_body(tmp1, arr_size, 0, tmp2); + __ membar(MacroAssembler::StoreStore); + __ verify_oop(obj); + + __ ret(); + + __ bind(slow_path); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + int call_offset = 0; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + + __ verify_oop(obj); + __ leave(); + __ ret(); + + // x10: new array + } + break; + + case new_multi_array_id: + { + StubFrame f(sasm, "new_multi_array", dont_gc_arguments); + // x10: klass + // x9: rank + // x12: address of 1st dimension + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + __ mv(c_rarg1, x10); + __ mv(c_rarg3, x12); + __ mv(c_rarg2, x9); + int call_offset = __ call_RT(x10, noreg, CAST_FROM_FN_PTR(address, new_multi_array), x11, x12, x13); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r10(sasm); + + // x10: new multi array + __ verify_oop(x10); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // This is called via call_runtime so the arguments + // will be place in C abi locations + __ verify_oop(c_rarg0); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = x15; + __ load_klass(t, x10); + __ lwu(t, Address(t, Klass::access_flags_offset())); + __ andi(t0, t, JVM_ACC_HAS_FINALIZER); + __ bnez(t0, register_finalizer); + __ ret(); + + __ bind(register_finalizer); + __ enter(); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), x10); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ leave(); + __ ret(); + } + break; + + case throw_class_cast_exception_id: + { + StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { + StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, + CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // Typical calling sequence: + // push klass_RInfo (object klass or other subclass) + // push sup_k_RInfo (array element klass or other superclass) + // jump to slow_subtype_check + // Note that the subclass is pushed first, and is therefore deepest. + enum layout { + x10_off, x10_off_hi, + x12_off, x12_off_hi, + x14_off, x14_off_hi, + x15_off, x15_off_hi, + sup_k_off, sup_k_off_hi, + klass_off, klass_off_hi, + framesize, + result_off = sup_k_off + }; + + __ set_info("slow_subtype_check", dont_gc_arguments); + __ push_reg(RegSet::of(x10, x12, x14, x15), sp); + + __ ld(x14, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // sub klass + __ ld(x10, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // super klass + + Label miss; + __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss); + + // fallthrough on success: + __ li(t0, 1); + __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); + __ ret(); + + __ bind(miss); + __ sd(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); + __ ret(); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + StubFrame f(sasm, "monitorenter", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + assert_cond(map != NULL); + + // Called with store_parameter and not C abi + f.load_argument(1, x10); // x10: object + f.load_argument(0, x11); // x11: lock address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), x10, x11); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + StubFrame f(sasm, "monitorexit", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + assert_cond(map != NULL); + + // Called with store_parameter and not C abi + f.load_argument(0, x10); // x10: lock address + + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), x10); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + f.load_argument(0, c_rarg1); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), c_rarg1); + + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + case throw_range_check_failed_id: + { + StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case unwind_exception_id: + { + __ set_info("unwind_exception", dont_gc_arguments); + // note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + + case access_field_patching_id: + { + StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { + StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { + StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { + StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { + StubFrame f(sasm, "handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { + StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case throw_index_exception_id: + { + StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { + StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + + case predicate_failed_trap_id: + { + StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); + + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + case dtrace_object_alloc_id: + { // c_rarg0: object + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); + + restore_live_registers(sasm); + } + break; + + default: + { + StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); + __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); + } + break; + } + } + return oop_maps; +} + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c1_globals_riscv.hpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C1_GLOBALS_RISCV_HPP +#define CPU_RISCV_C1_GLOBALS_RISCV_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true ); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); +define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !COMPILER2 +define_pd_global(bool, UseTypeProfile, false); + +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); +define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -0,0 +1,1641 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "opto/c2_MacroAssembler.hpp" +#include "opto/intrinsicnode.hpp" +#include "opto/subnode.hpp" +#include "runtime/stubRoutines.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// short string +// StringUTF16.indexOfChar +// StringLatin1.indexOfChar +void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, + Register ch, Register result, + bool isL) +{ + Register ch1 = t0; + Register index = t1; + + BLOCK_COMMENT("string_indexof_char_short {"); + + Label LOOP, LOOP1, LOOP4, LOOP8; + Label MATCH, MATCH1, MATCH2, MATCH3, + MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; + + mv(result, -1); + mv(index, zr); + + bind(LOOP); + addi(t0, index, 8); + ble(t0, cnt1, LOOP8); + addi(t0, index, 4); + ble(t0, cnt1, LOOP4); + j(LOOP1); + + bind(LOOP8); + isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); + beq(ch, ch1, MATCH); + isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); + beq(ch, ch1, MATCH1); + isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); + beq(ch, ch1, MATCH2); + isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); + beq(ch, ch1, MATCH3); + isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); + beq(ch, ch1, MATCH4); + isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); + beq(ch, ch1, MATCH5); + isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); + beq(ch, ch1, MATCH6); + isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); + beq(ch, ch1, MATCH7); + addi(index, index, 8); + addi(str1, str1, isL ? 8 : 16); + blt(index, cnt1, LOOP); + j(NOMATCH); + + bind(LOOP4); + isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); + beq(ch, ch1, MATCH); + isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); + beq(ch, ch1, MATCH1); + isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); + beq(ch, ch1, MATCH2); + isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); + beq(ch, ch1, MATCH3); + addi(index, index, 4); + addi(str1, str1, isL ? 4 : 8); + bge(index, cnt1, NOMATCH); + + bind(LOOP1); + isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); + beq(ch, ch1, MATCH); + addi(index, index, 1); + addi(str1, str1, isL ? 1 : 2); + blt(index, cnt1, LOOP1); + j(NOMATCH); + + bind(MATCH1); + addi(index, index, 1); + j(MATCH); + + bind(MATCH2); + addi(index, index, 2); + j(MATCH); + + bind(MATCH3); + addi(index, index, 3); + j(MATCH); + + bind(MATCH4); + addi(index, index, 4); + j(MATCH); + + bind(MATCH5); + addi(index, index, 5); + j(MATCH); + + bind(MATCH6); + addi(index, index, 6); + j(MATCH); + + bind(MATCH7); + addi(index, index, 7); + + bind(MATCH); + mv(result, index); + bind(NOMATCH); + BLOCK_COMMENT("} string_indexof_char_short"); +} + +// StringUTF16.indexOfChar +// StringLatin1.indexOfChar +void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + bool isL) +{ + Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; + Register ch1 = t0; + Register orig_cnt = t1; + Register mask1 = tmp3; + Register mask2 = tmp2; + Register match_mask = tmp1; + Register trailing_char = tmp4; + Register unaligned_elems = tmp4; + + BLOCK_COMMENT("string_indexof_char {"); + beqz(cnt1, NOMATCH); + + addi(t0, cnt1, isL ? -32 : -16); + bgtz(t0, DO_LONG); + string_indexof_char_short(str1, cnt1, ch, result, isL); + j(DONE); + + bind(DO_LONG); + mv(orig_cnt, cnt1); + if (AvoidUnalignedAccesses) { + Label ALIGNED; + andi(unaligned_elems, str1, 0x7); + beqz(unaligned_elems, ALIGNED); + sub(unaligned_elems, unaligned_elems, 8); + neg(unaligned_elems, unaligned_elems); + if (!isL) { + srli(unaligned_elems, unaligned_elems, 1); + } + // do unaligned part per element + string_indexof_char_short(str1, unaligned_elems, ch, result, isL); + bgez(result, DONE); + mv(orig_cnt, cnt1); + sub(cnt1, cnt1, unaligned_elems); + bind(ALIGNED); + } + + // duplicate ch + if (isL) { + slli(ch1, ch, 8); + orr(ch, ch1, ch); + } + slli(ch1, ch, 16); + orr(ch, ch1, ch); + slli(ch1, ch, 32); + orr(ch, ch1, ch); + + if (!isL) { + slli(cnt1, cnt1, 1); + } + + uint64_t mask0101 = UCONST64(0x0101010101010101); + uint64_t mask0001 = UCONST64(0x0001000100010001); + mv(mask1, isL ? mask0101 : mask0001); + uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); + uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); + mv(mask2, isL ? mask7f7f : mask7fff); + + bind(CH1_LOOP); + ld(ch1, Address(str1)); + addi(str1, str1, 8); + addi(cnt1, cnt1, -8); + compute_match_mask(ch1, ch, match_mask, mask1, mask2); + bnez(match_mask, HIT); + bgtz(cnt1, CH1_LOOP); + j(NOMATCH); + + bind(HIT); + ctzc_bit(trailing_char, match_mask, isL, ch1, result); + srli(trailing_char, trailing_char, 3); + addi(cnt1, cnt1, 8); + ble(cnt1, trailing_char, NOMATCH); + // match case + if (!isL) { + srli(cnt1, cnt1, 1); + srli(trailing_char, trailing_char, 1); + } + + sub(result, orig_cnt, cnt1); + add(result, result, trailing_char); + j(DONE); + + bind(NOMATCH); + mv(result, -1); + + bind(DONE); + BLOCK_COMMENT("} string_indexof_char"); +} + +typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); + +// Search for needle in haystack and return index or -1 +// x10: result +// x11: haystack +// x12: haystack_len +// x13: needle +// x14: needle_len +void C2_MacroAssembler::string_indexof(Register haystack, Register needle, + Register haystack_len, Register needle_len, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, int ae) +{ + assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + + Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; + + Register ch1 = t0; + Register ch2 = t1; + Register nlen_tmp = tmp1; // needle len tmp + Register hlen_tmp = tmp2; // haystack len tmp + Register result_tmp = tmp4; + + bool isLL = ae == StrIntrinsicNode::LL; + + bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; + bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; + int needle_chr_shift = needle_isL ? 0 : 1; + int haystack_chr_shift = haystack_isL ? 0 : 1; + int needle_chr_size = needle_isL ? 1 : 2; + int haystack_chr_size = haystack_isL ? 1 : 2; + load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + + BLOCK_COMMENT("string_indexof {"); + + // Note, inline_string_indexOf() generates checks: + // if (pattern.count > src.count) return -1; + // if (pattern.count == 0) return 0; + + // We have two strings, a source string in haystack, haystack_len and a pattern string + // in needle, needle_len. Find the first occurence of pattern in source or return -1. + + // For larger pattern and source we use a simplified Boyer Moore algorithm. + // With a small pattern and source we use linear scan. + + // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. + sub(result_tmp, haystack_len, needle_len); + // needle_len < 8, use linear scan + sub(t0, needle_len, 8); + bltz(t0, LINEARSEARCH); + // needle_len >= 256, use linear scan + sub(t0, needle_len, 256); + bgez(t0, LINEARSTUB); + // needle_len >= haystack_len/4, use linear scan + srli(t0, haystack_len, 2); + bge(needle_len, t0, LINEARSTUB); + + // Boyer-Moore-Horspool introduction: + // The Boyer Moore alogorithm is based on the description here:- + // + // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm + // + // This describes and algorithm with 2 shift rules. The 'Bad Character' rule + // and the 'Good Suffix' rule. + // + // These rules are essentially heuristics for how far we can shift the + // pattern along the search string. + // + // The implementation here uses the 'Bad Character' rule only because of the + // complexity of initialisation for the 'Good Suffix' rule. + // + // This is also known as the Boyer-Moore-Horspool algorithm: + // + // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm + // + // #define ASIZE 256 + // + // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { + // int i, j; + // unsigned c; + // unsigned char bc[ASIZE]; + // + // /* Preprocessing */ + // for (i = 0; i < ASIZE; ++i) + // bc[i] = m; + // for (i = 0; i < m - 1; ) { + // c = pattern[i]; + // ++i; + // // c < 256 for Latin1 string, so, no need for branch + // #ifdef PATTERN_STRING_IS_LATIN1 + // bc[c] = m - i; + // #else + // if (c < ASIZE) bc[c] = m - i; + // #endif + // } + // + // /* Searching */ + // j = 0; + // while (j <= n - m) { + // c = src[i+j]; + // if (pattern[m-1] == c) + // int k; + // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); + // if (k < 0) return j; + // // c < 256 for Latin1 string, so, no need for branch + // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 + // // LL case: (c< 256) always true. Remove branch + // j += bc[pattern[j+m-1]]; + // #endif + // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF + // // UU case: need if (c if not. + // if (c < ASIZE) + // j += bc[pattern[j+m-1]]; + // else + // j += m + // #endif + // } + // return -1; + // } + + // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result + Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, + BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; + + Register haystack_end = haystack_len; + Register skipch = tmp2; + + // pattern length is >=8, so, we can read at least 1 register for cases when + // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for + // UL case. We'll re-read last character in inner pre-loop code to have + // single outer pre-loop load + const int firstStep = isLL ? 7 : 3; + + const int ASIZE = 256; + const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) + + sub(sp, sp, ASIZE); + + // init BC offset table with default value: needle_len + slli(t0, needle_len, 8); + orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] + slli(tmp1, t0, 16); + orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] + slli(tmp1, t0, 32); + orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] + + mv(ch1, sp); // ch1 is t0 + mv(tmp6, ASIZE / STORE_BYTES); // loop iterations + + bind(BM_INIT_LOOP); + // for (i = 0; i < ASIZE; ++i) + // bc[i] = m; + for (int i = 0; i < 4; i++) { + sd(tmp5, Address(ch1, i * wordSize)); + } + add(ch1, ch1, 32); + sub(tmp6, tmp6, 4); + bgtz(tmp6, BM_INIT_LOOP); + + sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern + Register orig_haystack = tmp5; + mv(orig_haystack, haystack); + // result_tmp = tmp4 + shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); + sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 + mv(tmp3, needle); + + // for (i = 0; i < m - 1; ) { + // c = pattern[i]; + // ++i; + // // c < 256 for Latin1 string, so, no need for branch + // #ifdef PATTERN_STRING_IS_LATIN1 + // bc[c] = m - i; + // #else + // if (c < ASIZE) bc[c] = m - i; + // #endif + // } + bind(BCLOOP); + (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); + add(tmp3, tmp3, needle_chr_size); + if (!needle_isL) { + // ae == StrIntrinsicNode::UU + mv(tmp6, ASIZE); + bgeu(ch1, tmp6, BCSKIP); + } + add(tmp4, sp, ch1); + sb(ch2, Address(tmp4)); // store skip offset to BC offset table + + bind(BCSKIP); + sub(ch2, ch2, 1); // for next pattern element, skip distance -1 + bgtz(ch2, BCLOOP); + + // tmp6: pattern end, address after needle + shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); + if (needle_isL == haystack_isL) { + // load last 8 bytes (8LL/4UU symbols) + ld(tmp6, Address(tmp6, -wordSize)); + } else { + // UL: from UTF-16(source) search Latin1(pattern) + lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) + // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d + // We'll have to wait until load completed, but it's still faster than per-character loads+checks + srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a + slli(ch2, tmp6, XLEN - 24); + srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b + slli(ch1, tmp6, XLEN - 16); + srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c + andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d + slli(ch2, ch2, 16); + orr(ch2, ch2, ch1); // 0x00000b0c + slli(result, tmp3, 48); // use result as temp register + orr(tmp6, tmp6, result); // 0x0a00000d + slli(result, ch2, 16); + orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d + } + + // i = m - 1; + // skipch = j + i; + // if (skipch == pattern[m - 1] + // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); + // else + // move j with bad char offset table + bind(BMLOOPSTR2); + // compare pattern to source string backward + shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); + (this->*haystack_load_1chr)(skipch, Address(result), noreg); + sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 + if (needle_isL == haystack_isL) { + // re-init tmp3. It's for free because it's executed in parallel with + // load above. Alternative is to initialize it before loop, but it'll + // affect performance on in-order systems with 2 or more ld/st pipelines + srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] + } + if (!isLL) { // UU/UL case + slli(ch2, nlen_tmp, 1); // offsets in bytes + } + bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char + add(result, haystack, isLL ? nlen_tmp : ch2); + ld(ch2, Address(result)); // load 8 bytes from source string + mv(ch1, tmp6); + if (isLL) { + j(BMLOOPSTR1_AFTER_LOAD); + } else { + sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 + j(BMLOOPSTR1_CMP); + } + + bind(BMLOOPSTR1); + shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); + (this->*needle_load_1chr)(ch1, Address(ch1), noreg); + shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + + bind(BMLOOPSTR1_AFTER_LOAD); + sub(nlen_tmp, nlen_tmp, 1); + bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); + + bind(BMLOOPSTR1_CMP); + beq(ch1, ch2, BMLOOPSTR1); + + bind(BMSKIP); + if (!isLL) { + // if we've met UTF symbol while searching Latin1 pattern, then we can + // skip needle_len symbols + if (needle_isL != haystack_isL) { + mv(result_tmp, needle_len); + } else { + mv(result_tmp, 1); + } + mv(t0, ASIZE); + bgeu(skipch, t0, BMADV); + } + add(result_tmp, sp, skipch); + lbu(result_tmp, Address(result_tmp)); // load skip offset + + bind(BMADV); + sub(nlen_tmp, needle_len, 1); + // move haystack after bad char skip offset + shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); + ble(haystack, haystack_end, BMLOOPSTR2); + add(sp, sp, ASIZE); + j(NOMATCH); + + bind(BMLOOPSTR1_LASTCMP); + bne(ch1, ch2, BMSKIP); + + bind(BMMATCH); + sub(result, haystack, orig_haystack); + if (!haystack_isL) { + srli(result, result, 1); + } + add(sp, sp, ASIZE); + j(DONE); + + bind(LINEARSTUB); + sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm + bltz(t0, LINEARSEARCH); + mv(result, zr); + RuntimeAddress stub = NULL; + if (isLL) { + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); + assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); + } else if (needle_isL) { + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); + assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); + } else { + stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); + assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); + } + trampoline_call(stub); + j(DONE); + + bind(NOMATCH); + mv(result, -1); + j(DONE); + + bind(LINEARSEARCH); + string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); + + bind(DONE); + BLOCK_COMMENT("} string_indexof"); +} + +// string_indexof +// result: x10 +// src: x11 +// src_count: x12 +// pattern: x13 +// pattern_count: x14 or 1/2/3/4 +void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, + Register haystack_len, Register needle_len, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int needle_con_cnt, Register result, int ae) +{ + // Note: + // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant + // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 + assert(needle_con_cnt <= 4, "Invalid needle constant count"); + assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + + Register ch1 = t0; + Register ch2 = t1; + Register hlen_neg = haystack_len, nlen_neg = needle_len; + Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; + + bool isLL = ae == StrIntrinsicNode::LL; + + bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; + bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; + int needle_chr_shift = needle_isL ? 0 : 1; + int haystack_chr_shift = haystack_isL ? 0 : 1; + int needle_chr_size = needle_isL ? 1 : 2; + int haystack_chr_size = haystack_isL ? 1 : 2; + + load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : + (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; + load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; + + Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; + + Register first = tmp3; + + if (needle_con_cnt == -1) { + Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; + + sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); + bltz(t0, DOSHORT); + + (this->*needle_load_1chr)(first, Address(needle), noreg); + slli(t0, needle_len, needle_chr_shift); + add(needle, needle, t0); + neg(nlen_neg, t0); + slli(t0, result_tmp, haystack_chr_shift); + add(haystack, haystack, t0); + neg(hlen_neg, t0); + + bind(FIRST_LOOP); + add(t0, haystack, hlen_neg); + (this->*haystack_load_1chr)(ch2, Address(t0), noreg); + beq(first, ch2, STR1_LOOP); + + bind(STR2_NEXT); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, FIRST_LOOP); + j(NOMATCH); + + bind(STR1_LOOP); + add(nlen_tmp, nlen_neg, needle_chr_size); + add(hlen_tmp, hlen_neg, haystack_chr_size); + bgez(nlen_tmp, MATCH); + + bind(STR1_NEXT); + add(ch1, needle, nlen_tmp); + (this->*needle_load_1chr)(ch1, Address(ch1), noreg); + add(ch2, haystack, hlen_tmp); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + bne(ch1, ch2, STR2_NEXT); + add(nlen_tmp, nlen_tmp, needle_chr_size); + add(hlen_tmp, hlen_tmp, haystack_chr_size); + bltz(nlen_tmp, STR1_NEXT); + j(MATCH); + + bind(DOSHORT); + if (needle_isL == haystack_isL) { + sub(t0, needle_len, 2); + bltz(t0, DO1); + bgtz(t0, DO3); + } + } + + if (needle_con_cnt == 4) { + Label CH1_LOOP; + (this->*load_4chr)(ch1, Address(needle), noreg); + sub(result_tmp, haystack_len, 4); + slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(CH1_LOOP); + add(ch2, haystack, hlen_neg); + (this->*load_4chr)(ch2, Address(ch2), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, CH1_LOOP); + j(NOMATCH); + } + + if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { + Label CH1_LOOP; + BLOCK_COMMENT("string_indexof DO2 {"); + bind(DO2); + (this->*load_2chr)(ch1, Address(needle), noreg); + if (needle_con_cnt == 2) { + sub(result_tmp, haystack_len, 2); + } + slli(tmp3, result_tmp, haystack_chr_shift); + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(CH1_LOOP); + add(tmp3, haystack, hlen_neg); + (this->*load_2chr)(ch2, Address(tmp3), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, CH1_LOOP); + j(NOMATCH); + BLOCK_COMMENT("} string_indexof DO2"); + } + + if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { + Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; + BLOCK_COMMENT("string_indexof DO3 {"); + + bind(DO3); + (this->*load_2chr)(first, Address(needle), noreg); + (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); + if (needle_con_cnt == 3) { + sub(result_tmp, haystack_len, 3); + } + slli(hlen_tmp, result_tmp, haystack_chr_shift); + add(haystack, haystack, hlen_tmp); + neg(hlen_neg, hlen_tmp); + + bind(FIRST_LOOP); + add(ch2, haystack, hlen_neg); + (this->*load_2chr)(ch2, Address(ch2), noreg); + beq(first, ch2, STR1_LOOP); + + bind(STR2_NEXT); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, FIRST_LOOP); + j(NOMATCH); + + bind(STR1_LOOP); + add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); + add(ch2, haystack, hlen_tmp); + (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + bne(ch1, ch2, STR2_NEXT); + j(MATCH); + BLOCK_COMMENT("} string_indexof DO3"); + } + + if (needle_con_cnt == -1 || needle_con_cnt == 1) { + Label DO1_LOOP; + + BLOCK_COMMENT("string_indexof DO1 {"); + bind(DO1); + (this->*needle_load_1chr)(ch1, Address(needle), noreg); + sub(result_tmp, haystack_len, 1); + mv(tmp3, result_tmp); + if (haystack_chr_shift) { + slli(tmp3, result_tmp, haystack_chr_shift); + } + add(haystack, haystack, tmp3); + neg(hlen_neg, tmp3); + + bind(DO1_LOOP); + add(tmp3, haystack, hlen_neg); + (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); + beq(ch1, ch2, MATCH); + add(hlen_neg, hlen_neg, haystack_chr_size); + blez(hlen_neg, DO1_LOOP); + BLOCK_COMMENT("} string_indexof DO1"); + } + + bind(NOMATCH); + mv(result, -1); + j(DONE); + + bind(MATCH); + srai(t0, hlen_neg, haystack_chr_shift); + add(result, result_tmp, t0); + + bind(DONE); +} + +// Compare strings. +void C2_MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, + Register tmp3, int ae) +{ + Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, + DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, + SHORT_LOOP_START, TAIL_CHECK, L; + + const int STUB_THRESHOLD = 64 + 8; + bool isLL = ae == StrIntrinsicNode::LL; + bool isLU = ae == StrIntrinsicNode::LU; + bool isUL = ae == StrIntrinsicNode::UL; + + bool str1_isL = isLL || isLU; + bool str2_isL = isLL || isUL; + + // for L strings, 1 byte for 1 character + // for U strings, 2 bytes for 1 character + int str1_chr_size = str1_isL ? 1 : 2; + int str2_chr_size = str2_isL ? 1 : 2; + int minCharsInWord = isLL ? wordSize : wordSize / 2; + + load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + + BLOCK_COMMENT("string_compare {"); + + // Bizzarely, the counts are passed in bytes, regardless of whether they + // are L or U strings, however the result is always in characters. + if (!str1_isL) { + sraiw(cnt1, cnt1, 1); + } + if (!str2_isL) { + sraiw(cnt2, cnt2, 1); + } + + // Compute the minimum of the string lengths and save the difference in result. + sub(result, cnt1, cnt2); + bgt(cnt1, cnt2, L); + mv(cnt2, cnt1); + bind(L); + + // A very short string + li(t0, minCharsInWord); + ble(cnt2, t0, SHORT_STRING); + + // Compare longwords + // load first parts of strings and finish initialization while loading + { + if (str1_isL == str2_isL) { // LL or UU + // load 8 bytes once to compare + ld(tmp1, Address(str1)); + beq(str1, str2, DONE); + ld(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + sub(cnt2, cnt2, minCharsInWord); + beqz(cnt2, TAIL_CHECK); + // convert cnt2 from characters to bytes + if (!str1_isL) { + slli(cnt2, cnt2, 1); + } + add(str2, str2, cnt2); + add(str1, str1, cnt2); + sub(cnt2, zr, cnt2); + } else if (isLU) { // LU case + lwu(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + addi(cnt2, cnt2, -4); + add(str1, str1, cnt2); + sub(cnt1, zr, cnt2); + slli(cnt2, cnt2, 1); + add(str2, str2, cnt2); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + sub(cnt2, zr, cnt2); + addi(cnt1, cnt1, 4); + } else { // UL case + ld(tmp1, Address(str1)); + lwu(tmp2, Address(str2)); + li(t0, STUB_THRESHOLD); + bge(cnt2, t0, STUB); + addi(cnt2, cnt2, -4); + slli(t0, cnt2, 1); + sub(cnt1, zr, t0); + add(str1, str1, t0); + add(str2, str2, cnt2); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + sub(cnt2, zr, cnt2); + addi(cnt1, cnt1, 8); + } + addi(cnt2, cnt2, isUL ? 4 : 8); + bgez(cnt2, TAIL); + xorr(tmp3, tmp1, tmp2); + bnez(tmp3, DIFFERENCE); + + // main loop + bind(NEXT_WORD); + if (str1_isL == str2_isL) { // LL or UU + add(t0, str1, cnt2); + ld(tmp1, Address(t0)); + add(t0, str2, cnt2); + ld(tmp2, Address(t0)); + addi(cnt2, cnt2, 8); + } else if (isLU) { // LU case + add(t0, str1, cnt1); + lwu(tmp1, Address(t0)); + add(t0, str2, cnt2); + ld(tmp2, Address(t0)); + addi(cnt1, cnt1, 4); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + addi(cnt2, cnt2, 8); + } else { // UL case + add(t0, str2, cnt2); + lwu(tmp2, Address(t0)); + add(t0, str1, cnt1); + ld(tmp1, Address(t0)); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + addi(cnt1, cnt1, 8); + addi(cnt2, cnt2, 4); + } + bgez(cnt2, TAIL); + + xorr(tmp3, tmp1, tmp2); + beqz(tmp3, NEXT_WORD); + j(DIFFERENCE); + bind(TAIL); + xorr(tmp3, tmp1, tmp2); + bnez(tmp3, DIFFERENCE); + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + if (str1_isL == str2_isL) { // LL or UU + ld(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + } else if (isLU) { // LU case + lwu(tmp1, Address(str1)); + ld(tmp2, Address(str2)); + inflate_lo32(tmp3, tmp1); + mv(tmp1, tmp3); + } else { // UL case + lwu(tmp2, Address(str2)); + ld(tmp1, Address(str1)); + inflate_lo32(tmp3, tmp2); + mv(tmp2, tmp3); + } + bind(TAIL_CHECK); + xorr(tmp3, tmp1, tmp2); + beqz(tmp3, DONE); + + // Find the first different characters in the longwords and + // compute their difference. + bind(DIFFERENCE); + ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb + srl(tmp1, tmp1, result); + srl(tmp2, tmp2, result); + if (isLL) { + andi(tmp1, tmp1, 0xFF); + andi(tmp2, tmp2, 0xFF); + } else { + andi(tmp1, tmp1, 0xFFFF); + andi(tmp2, tmp2, 0xFFFF); + } + sub(result, tmp1, tmp2); + j(DONE); + } + + bind(STUB); + RuntimeAddress stub = NULL; + switch (ae) { + case StrIntrinsicNode::LL: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); + break; + case StrIntrinsicNode::UU: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); + break; + case StrIntrinsicNode::LU: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); + break; + case StrIntrinsicNode::UL: + stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); + break; + default: + ShouldNotReachHere(); + } + assert(stub.target() != NULL, "compare_long_string stub has not been generated"); + trampoline_call(stub); + j(DONE); + + bind(SHORT_STRING); + // Is the minimum length zero? + beqz(cnt2, DONE); + // arrange code to do most branches while loading and loading next characters + // while comparing previous + (this->*str1_load_chr)(tmp1, Address(str1), t0); + addi(str1, str1, str1_chr_size); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST_INIT); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + j(SHORT_LOOP_START); + bind(SHORT_LOOP); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST); + bind(SHORT_LOOP_START); + (this->*str1_load_chr)(tmp2, Address(str1), t0); + addi(str1, str1, str1_chr_size); + (this->*str2_load_chr)(t0, Address(str2), t0); + addi(str2, str2, str2_chr_size); + bne(tmp1, cnt1, SHORT_LOOP_TAIL); + addi(cnt2, cnt2, -1); + beqz(cnt2, SHORT_LAST2); + (this->*str1_load_chr)(tmp1, Address(str1), t0); + addi(str1, str1, str1_chr_size); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + beq(tmp2, t0, SHORT_LOOP); + sub(result, tmp2, t0); + j(DONE); + bind(SHORT_LOOP_TAIL); + sub(result, tmp1, cnt1); + j(DONE); + bind(SHORT_LAST2); + beq(tmp2, t0, DONE); + sub(result, tmp2, t0); + + j(DONE); + bind(SHORT_LAST_INIT); + (this->*str2_load_chr)(cnt1, Address(str2), t0); + addi(str2, str2, str2_chr_size); + bind(SHORT_LAST); + beq(tmp1, cnt1, DONE); + sub(result, tmp1, cnt1); + + bind(DONE); + + BLOCK_COMMENT("} string_compare"); +} + +void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, + Register tmp4, Register tmp5, Register tmp6, Register result, + Register cnt1, int elem_size) { + Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; + Register tmp1 = t0; + Register tmp2 = t1; + Register cnt2 = tmp2; // cnt2 only used in array length compare + Register elem_per_word = tmp6; + int log_elem_size = exact_log2(elem_size); + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + + assert(elem_size == 1 || elem_size == 2, "must be char or byte"); + assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); + li(elem_per_word, wordSize / elem_size); + + BLOCK_COMMENT("arrays_equals {"); + + // if (a1 == a2), return true + beq(a1, a2, SAME); + + mv(result, false); + beqz(a1, DONE); + beqz(a2, DONE); + lwu(cnt1, Address(a1, length_offset)); + lwu(cnt2, Address(a2, length_offset)); + bne(cnt2, cnt1, DONE); + beqz(cnt1, SAME); + + slli(tmp5, cnt1, 3 + log_elem_size); + sub(tmp5, zr, tmp5); + add(a1, a1, base_offset); + add(a2, a2, base_offset); + ld(tmp3, Address(a1, 0)); + ld(tmp4, Address(a2, 0)); + ble(cnt1, elem_per_word, SHORT); // short or same + + // Main 16 byte comparison loop with 2 exits + bind(NEXT_DWORD); { + ld(tmp1, Address(a1, wordSize)); + ld(tmp2, Address(a2, wordSize)); + sub(cnt1, cnt1, 2 * wordSize / elem_size); + blez(cnt1, TAIL); + bne(tmp3, tmp4, DONE); + ld(tmp3, Address(a1, 2 * wordSize)); + ld(tmp4, Address(a2, 2 * wordSize)); + add(a1, a1, 2 * wordSize); + add(a2, a2, 2 * wordSize); + ble(cnt1, elem_per_word, TAIL2); + } beq(tmp1, tmp2, NEXT_DWORD); + j(DONE); + + bind(TAIL); + xorr(tmp4, tmp3, tmp4); + xorr(tmp2, tmp1, tmp2); + sll(tmp2, tmp2, tmp5); + orr(tmp5, tmp4, tmp2); + j(IS_TMP5_ZR); + + bind(TAIL2); + bne(tmp1, tmp2, DONE); + + bind(SHORT); + xorr(tmp4, tmp3, tmp4); + sll(tmp5, tmp4, tmp5); + + bind(IS_TMP5_ZR); + bnez(tmp5, DONE); + + bind(SAME); + mv(result, true); + // That's it. + bind(DONE); + + BLOCK_COMMENT("} array_equals"); +} + +// Compare Strings + +// For Strings we're passed the address of the first characters in a1 +// and a2 and the length in cnt1. +// elem_size is the element size in bytes: either 1 or 2. +// There are two implementations. For arrays >= 8 bytes, all +// comparisons (including the final one, which may overlap) are +// performed 8 bytes at a time. For strings < 8 bytes, we compare a +// halfword, then a short, and then a byte. + +void C2_MacroAssembler::string_equals(Register a1, Register a2, + Register result, Register cnt1, int elem_size) +{ + Label SAME, DONE, SHORT, NEXT_WORD; + Register tmp1 = t0; + Register tmp2 = t1; + + assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); + assert_different_registers(a1, a2, result, cnt1, t0, t1); + + BLOCK_COMMENT("string_equals {"); + + mv(result, false); + + // Check for short strings, i.e. smaller than wordSize. + sub(cnt1, cnt1, wordSize); + bltz(cnt1, SHORT); + + // Main 8 byte comparison loop. + bind(NEXT_WORD); { + ld(tmp1, Address(a1, 0)); + add(a1, a1, wordSize); + ld(tmp2, Address(a2, 0)); + add(a2, a2, wordSize); + sub(cnt1, cnt1, wordSize); + bne(tmp1, tmp2, DONE); + } bgtz(cnt1, NEXT_WORD); + + // Last longword. In the case where length == 4 we compare the + // same longword twice, but that's still faster than another + // conditional branch. + // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when + // length == 4. + add(tmp1, a1, cnt1); + ld(tmp1, Address(tmp1, 0)); + add(tmp2, a2, cnt1); + ld(tmp2, Address(tmp2, 0)); + bne(tmp1, tmp2, DONE); + j(SAME); + + bind(SHORT); + Label TAIL03, TAIL01; + + // 0-7 bytes left. + andi(t0, cnt1, 4); + beqz(t0, TAIL03); + { + lwu(tmp1, Address(a1, 0)); + add(a1, a1, 4); + lwu(tmp2, Address(a2, 0)); + add(a2, a2, 4); + bne(tmp1, tmp2, DONE); + } + + bind(TAIL03); + // 0-3 bytes left. + andi(t0, cnt1, 2); + beqz(t0, TAIL01); + { + lhu(tmp1, Address(a1, 0)); + add(a1, a1, 2); + lhu(tmp2, Address(a2, 0)); + add(a2, a2, 2); + bne(tmp1, tmp2, DONE); + } + + bind(TAIL01); + if (elem_size == 1) { // Only needed when comparing 1-byte elements + // 0-1 bytes left. + andi(t0, cnt1, 1); + beqz(t0, SAME); + { + lbu(tmp1, a1, 0); + lbu(tmp2, a2, 0); + bne(tmp1, tmp2, DONE); + } + } + + // Arrays are equal. + bind(SAME); + mv(result, true); + + // That's it. + bind(DONE); + BLOCK_COMMENT("} string_equals"); +} + +typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); +typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, + bool is_far, bool is_unordered); + +static conditional_branch_insn conditional_branches[] = +{ + /* SHORT branches */ + (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::bgt, + NULL, // BoolTest::overflow + (conditional_branch_insn)&Assembler::blt, + (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::ble, + NULL, // BoolTest::no_overflow + (conditional_branch_insn)&Assembler::bge, + + /* UNSIGNED branches */ + (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::bgtu, + NULL, + (conditional_branch_insn)&Assembler::bltu, + (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::bleu, + NULL, + (conditional_branch_insn)&Assembler::bgeu +}; + +static float_conditional_branch_insn float_conditional_branches[] = +{ + /* FLOAT SHORT branches */ + (float_conditional_branch_insn)&MacroAssembler::float_beq, + (float_conditional_branch_insn)&MacroAssembler::float_bgt, + NULL, // BoolTest::overflow + (float_conditional_branch_insn)&MacroAssembler::float_blt, + (float_conditional_branch_insn)&MacroAssembler::float_bne, + (float_conditional_branch_insn)&MacroAssembler::float_ble, + NULL, // BoolTest::no_overflow + (float_conditional_branch_insn)&MacroAssembler::float_bge, + + /* DOUBLE SHORT branches */ + (float_conditional_branch_insn)&MacroAssembler::double_beq, + (float_conditional_branch_insn)&MacroAssembler::double_bgt, + NULL, + (float_conditional_branch_insn)&MacroAssembler::double_blt, + (float_conditional_branch_insn)&MacroAssembler::double_bne, + (float_conditional_branch_insn)&MacroAssembler::double_ble, + NULL, + (float_conditional_branch_insn)&MacroAssembler::double_bge +}; + +void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), + "invalid conditional branch index"); + (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); +} + +// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use +// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). +void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), + "invalid float conditional branch index"); + int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); + (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, + (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); +} + +void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { + switch (cmpFlag) { + case BoolTest::eq: + case BoolTest::le: + beqz(op1, L, is_far); + break; + case BoolTest::ne: + case BoolTest::gt: + bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } +} + +void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { + switch (cmpFlag) { + case BoolTest::eq: + beqz(op1, L, is_far); + break; + case BoolTest::ne: + bnez(op1, L, is_far); + break; + default: + ShouldNotReachHere(); + } +} + +void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { + Label L; + cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); + mv(dst, src); + bind(L); +} + +// Set dst to NaN if any NaN input. +void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, + bool is_double, bool is_min) { + assert_different_registers(dst, src1, src2); + + Label Done; + fsflags(zr); + if (is_double) { + is_min ? fmin_d(dst, src1, src2) + : fmax_d(dst, src1, src2); + // Checking NaNs + flt_d(zr, src1, src2); + } else { + is_min ? fmin_s(dst, src1, src2) + : fmax_s(dst, src1, src2); + // Checking NaNs + flt_s(zr, src1, src2); + } + + frflags(t0); + beqz(t0, Done); + + // In case of NaNs + is_double ? fadd_d(dst, src1, src2) + : fadd_s(dst, src1, src2); + + bind(Done); +} + +void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, + VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { + Label loop; + Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; + + bind(loop); + vsetvli(tmp1, cnt, sew, Assembler::m2); + vlex_v(vr1, a1, sew); + vlex_v(vr2, a2, sew); + vmsne_vv(vrs, vr1, vr2); + vfirst_m(tmp2, vrs); + bgez(tmp2, DONE); + sub(cnt, cnt, tmp1); + if (!islatin) { + slli(tmp1, tmp1, 1); // get byte counts + } + add(a1, a1, tmp1); + add(a2, a2, tmp1); + bnez(cnt, loop); + + mv(result, true); +} + +void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { + Label DONE; + Register tmp1 = t0; + Register tmp2 = t1; + + BLOCK_COMMENT("string_equals_v {"); + + mv(result, false); + + if (elem_size == 2) { + srli(cnt, cnt, 1); + } + + element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); + + bind(DONE); + BLOCK_COMMENT("} string_equals_v"); +} + +// used by C2 ClearArray patterns. +// base: Address of a buffer to be zeroed +// cnt: Count in HeapWords +// +// base, cnt, v0, v1 and t0 are clobbered. +void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { + Label loop; + + // making zero words + vsetvli(t0, cnt, Assembler::e64, Assembler::m4); + vxor_vv(v0, v0, v0); + + bind(loop); + vsetvli(t0, cnt, Assembler::e64, Assembler::m4); + vse64_v(v0, base); + sub(cnt, cnt, t0); + shadd(base, t0, base, t0, 3); + bnez(cnt, loop); +} + +void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, + Register cnt1, int elem_size) { + Label DONE; + Register tmp1 = t0; + Register tmp2 = t1; + Register cnt2 = tmp2; + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + + BLOCK_COMMENT("arrays_equals_v {"); + + // if (a1 == a2), return true + mv(result, true); + beq(a1, a2, DONE); + + mv(result, false); + // if a1 == null or a2 == null, return false + beqz(a1, DONE); + beqz(a2, DONE); + // if (a1.length != a2.length), return false + lwu(cnt1, Address(a1, length_offset)); + lwu(cnt2, Address(a2, length_offset)); + bne(cnt1, cnt2, DONE); + + la(a1, Address(a1, base_offset)); + la(a2, Address(a2, base_offset)); + + element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); + + bind(DONE); + + BLOCK_COMMENT("} arrays_equals_v"); +} + +void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, + Register result, Register tmp1, Register tmp2, int encForm) { + Label DIFFERENCE, DONE, L, loop; + bool encLL = encForm == StrIntrinsicNode::LL; + bool encLU = encForm == StrIntrinsicNode::LU; + bool encUL = encForm == StrIntrinsicNode::UL; + + bool str1_isL = encLL || encLU; + bool str2_isL = encLL || encUL; + + int minCharsInWord = encLL ? wordSize : wordSize / 2; + + BLOCK_COMMENT("string_compare {"); + + // for Lating strings, 1 byte for 1 character + // for UTF16 strings, 2 bytes for 1 character + if (!str1_isL) + sraiw(cnt1, cnt1, 1); + if (!str2_isL) + sraiw(cnt2, cnt2, 1); + + // if str1 == str2, return the difference + // save the minimum of the string lengths in cnt2. + sub(result, cnt1, cnt2); + bgt(cnt1, cnt2, L); + mv(cnt2, cnt1); + bind(L); + + if (str1_isL == str2_isL) { // LL or UU + element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); + j(DONE); + } else { // LU or UL + Register strL = encLU ? str1 : str2; + Register strU = encLU ? str2 : str1; + VectorRegister vstr1 = encLU ? v4 : v0; + VectorRegister vstr2 = encLU ? v0 : v4; + + bind(loop); + vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); + vle8_v(vstr1, strL); + vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); + vzext_vf2(vstr2, vstr1); + vle16_v(vstr1, strU); + vmsne_vv(v0, vstr2, vstr1); + vfirst_m(tmp2, v0); + bgez(tmp2, DIFFERENCE); + sub(cnt2, cnt2, tmp1); + add(strL, strL, tmp1); + shadd(strU, tmp1, strU, tmp1, 1); + bnez(cnt2, loop); + j(DONE); + } + bind(DIFFERENCE); + slli(tmp1, tmp2, 1); + add(str1, str1, str1_isL ? tmp2 : tmp1); + add(str2, str2, str2_isL ? tmp2 : tmp1); + str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); + str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); + sub(result, tmp1, tmp2); + + bind(DONE); +} + +void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { + Label loop; + assert_different_registers(src, dst, len, tmp, t0); + + BLOCK_COMMENT("byte_array_inflate_v {"); + bind(loop); + vsetvli(tmp, len, Assembler::e8, Assembler::m2); + vle8_v(v2, src); + vsetvli(t0, len, Assembler::e16, Assembler::m4); + vzext_vf2(v0, v2); + vse16_v(v0, dst); + sub(len, len, tmp); + add(src, src, tmp); + shadd(dst, tmp, dst, tmp, 1); + bnez(len, loop); + BLOCK_COMMENT("} byte_array_inflate_v"); +} + +// Compress char[] array to byte[]. +// result: the array length if every element in array can be encoded; 0, otherwise. +void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { + Label done; + encode_iso_array_v(src, dst, len, result, tmp); + beqz(len, done); + mv(result, zr); + bind(done); +} + +// result: the number of elements had been encoded. +void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { + Label loop, DIFFERENCE, DONE; + + BLOCK_COMMENT("encode_iso_array_v {"); + mv(result, 0); + + bind(loop); + mv(tmp, 0xff); + vsetvli(t0, len, Assembler::e16, Assembler::m2); + vle16_v(v2, src); + // if element > 0xff, stop + vmsgtu_vx(v1, v2, tmp); + vfirst_m(tmp, v1); + vmsbf_m(v0, v1); + // compress char to byte + vsetvli(t0, len, Assembler::e8); + vncvt_x_x_w(v1, v2, Assembler::v0_t); + vse8_v(v1, dst, Assembler::v0_t); + + bgez(tmp, DIFFERENCE); + add(result, result, t0); + add(dst, dst, t0); + sub(len, len, t0); + shadd(src, t0, src, t0, 1); + bnez(len, loop); + j(DONE); + + bind(DIFFERENCE); + add(result, result, tmp); + + bind(DONE); + BLOCK_COMMENT("} encode_iso_array_v"); +} + +void C2_MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) { + Label loop, DONE; + + BLOCK_COMMENT("has_negatives_v {"); + mv(result, true); + + bind(loop); + vsetvli(t0, len, Assembler::e8, Assembler::m4); + vle8_v(v0, ary); + // if element highest bit is set, return true + vmslt_vx(v0, v0, zr); + vfirst_m(tmp, v0); + bgez(tmp, DONE); + + sub(len, len, t0); + add(ary, ary, t0); + bnez(len, loop); + mv(result, false); + + bind(DONE); + BLOCK_COMMENT("} has_negatives_v"); +} + +void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + bool isL) { + mv(result, zr); + + Label loop, MATCH, DONE; + Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; + bind(loop); + vsetvli(tmp1, cnt1, sew, Assembler::m4); + vlex_v(v0, str1, sew); + vmseq_vx(v0, v0, ch); + vfirst_m(tmp2, v0); + bgez(tmp2, MATCH); // if equal, return index + + add(result, result, tmp1); + sub(cnt1, cnt1, tmp1); + if (!isL) slli(tmp1, tmp1, 1); + add(str1, str1, tmp1); + bnez(cnt1, loop); + + mv(result, -1); + j(DONE); + + bind(MATCH); + add(result, result, tmp2); + + bind(DONE); +} + +// Set dst to NaN if any NaN input. +void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, + bool is_double, bool is_min) { + assert_different_registers(dst, src1, src2); + + vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); + + is_min ? vfmin_vv(dst, src1, src2) + : vfmax_vv(dst, src1, src2); + + vmfne_vv(v0, src1, src1); + vfadd_vv(dst, src1, src1, Assembler::v0_t); + vmfne_vv(v0, src2, src2); + vfadd_vv(dst, src2, src2, Assembler::v0_t); +} + +// Set dst to NaN if any NaN input. +void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, + FloatRegister src1, VectorRegister src2, + VectorRegister tmp1, VectorRegister tmp2, + bool is_double, bool is_min) { + assert_different_registers(src2, tmp1, tmp2); + + Label L_done, L_NaN; + vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); + vfmv_s_f(tmp2, src1); + + is_min ? vfredmin_vs(tmp1, src2, tmp2) + : vfredmax_vs(tmp1, src2, tmp2); + + fsflags(zr); + // Checking NaNs + vmflt_vf(tmp2, src2, src1); + frflags(t0); + bnez(t0, L_NaN); + j(L_done); + + bind(L_NaN); + vfmv_s_f(tmp2, src1); + vfredsum_vs(tmp1, src2, tmp2); + + bind(L_done); + vfmv_f_s(dst, tmp1); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP + +// C2_MacroAssembler contains high-level macros for C2 + + private: + void element_compare(Register r1, Register r2, + Register result, Register cnt, + Register tmp1, Register tmp2, + VectorRegister vr1, VectorRegister vr2, + VectorRegister vrs, + bool is_latin, Label& DONE); + public: + + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + Register tmp1, Register tmp2, Register tmp3, + int ae); + + void string_indexof_char_short(Register str1, Register cnt1, + Register ch, Register result, + bool isL); + + void string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + bool isL); + + void string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, int ae); + + void string_indexof_linearscan(Register haystack, Register needle, + Register haystack_len, Register needle_len, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + int needle_con_cnt, Register result, int ae); + + void arrays_equals(Register r1, Register r2, + Register tmp3, Register tmp4, + Register tmp5, Register tmp6, + Register result, Register cnt1, + int elem_size); + + void string_equals(Register r1, Register r2, + Register result, Register cnt1, + int elem_size); + + // refer to conditional_branches and float_conditional_branches + static const int bool_test_bits = 3; + static const int neg_cond_bits = 2; + static const int unsigned_branch_mask = 1 << bool_test_bits; + static const int double_branch_mask = 1 << bool_test_bits; + + // cmp + void cmp_branch(int cmpFlag, + Register op1, Register op2, + Label& label, bool is_far = false); + + void float_cmp_branch(int cmpFlag, + FloatRegister op1, FloatRegister op2, + Label& label, bool is_far = false); + + void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, + Label& L, bool is_far = false); + + void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, + Label& L, bool is_far = false); + + void enc_cmove(int cmpFlag, + Register op1, Register op2, + Register dst, Register src); + + void spill(Register r, bool is64, int offset) { + is64 ? sd(r, Address(sp, offset)) + : sw(r, Address(sp, offset)); + } + + void spill(FloatRegister f, bool is64, int offset) { + is64 ? fsd(f, Address(sp, offset)) + : fsw(f, Address(sp, offset)); + } + + void spill(VectorRegister v, int offset) { + add(t0, sp, offset); + vs1r_v(v, t0); + } + + void unspill(Register r, bool is64, int offset) { + is64 ? ld(r, Address(sp, offset)) + : lw(r, Address(sp, offset)); + } + + void unspillu(Register r, bool is64, int offset) { + is64 ? ld(r, Address(sp, offset)) + : lwu(r, Address(sp, offset)); + } + + void unspill(FloatRegister f, bool is64, int offset) { + is64 ? fld(f, Address(sp, offset)) + : flw(f, Address(sp, offset)); + } + + void unspill(VectorRegister v, int offset) { + add(t0, sp, offset); + vl1r_v(v, t0); + } + + void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { + assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); + unspill(v0, src_offset); + spill(v0, dst_offset); + } + + void minmax_FD(FloatRegister dst, + FloatRegister src1, FloatRegister src2, + bool is_double, bool is_min); + + // intrinsic methods implemented by rvv instructions + void string_equals_v(Register r1, Register r2, + Register result, Register cnt1, + int elem_size); + + void arrays_equals_v(Register r1, Register r2, + Register result, Register cnt1, + int elem_size); + + void string_compare_v(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + int encForm); + + void clear_array_v(Register base, Register cnt); + + void byte_array_inflate_v(Register src, Register dst, + Register len, Register tmp); + + void char_array_compress_v(Register src, Register dst, + Register len, Register result, + Register tmp); + + void encode_iso_array_v(Register src, Register dst, + Register len, Register result, + Register tmp); + + void has_negatives_v(Register ary, Register len, + Register result, Register tmp); + + void string_indexof_char_v(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, + bool isL); + + void minmax_FD_v(VectorRegister dst, + VectorRegister src1, VectorRegister src2, + bool is_double, bool is_min); + + void reduce_minmax_FD_v(FloatRegister dst, + FloatRegister src1, VectorRegister src2, + VectorRegister tmp1, VectorRegister tmp2, + bool is_double, bool is_min); + +#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_globals_riscv.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP +#define CPU_RISCV_C2_GLOBALS_RISCV_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); +define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 0); +define_pd_global(intx, FLOATPRESSURE, 32); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 24); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +define_pd_global(intx, LoopPercentProfileLimit, 10); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoScheduling, true); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); +define_pd_global(bool, IdealizeClearArrayNode, true); + +define_pd_global(intx, ReservedCodeCacheSize, 48*M); +define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(intx, ProfiledCodeHeapSize, 22*M); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(uintx, CodeCacheMinBlockLength, 6); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. + +#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_init_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_init_riscv.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for riscv + +extern void reg_mask_init(); + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); + reg_mask_init(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ masm. +void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + RuntimeAddress callback_addr(stub); + + __ bind(entry->_stub_label); + InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); + masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); + __ la(t0, safepoint_pc.target()); + __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); + __ far_jump(callback_addr); +} +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP +#define CPU_RISCV_CODEBUFFER_RISCV_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/compiledIC_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/compiledIC_riscv.cpp @@ -0,0 +1,149 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + precond(cbuf.stubs()->start() != badAddress); + precond(cbuf.stubs()->end() != badAddress); + // Stub is fixed up when the corresponding call is converted from + // calling compiled code to calling interpreted code. + // mv xmethod, 0 + // jalr -4 # to self + + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(to_interp_stub_size()); + int offset = __ offset(); + if (base == NULL) { + return NULL; // CodeBuffer::expand failed + } + // static stub relocation stores the instruction address of the call + __ relocate(static_stub_Relocation::spec(mark)); + + __ emit_static_call_stub(); + + assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr + return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size; +} + +int CompiledStaticCall::to_trampoline_stub_size() { + // Somewhat pessimistically, we count 4 instructions here (although + // there are only 3) because we sometimes emit an alignment nop. + // Trampoline stubs are always word aligned. + return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 4; // 3 in emit_to_interp_stub + 1 in emit_call +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); +#ifdef ASSERT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + + verify_mt_safe(callee, entry, method_holder, jump); +#endif + // Update stub. + method_holder->set_data((intptr_t)callee()); + NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); + ICache::invalidate_range(stub, to_interp_stub_size()); + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + method_holder->set_data(0); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); + _call->verify_alignment(); + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/copy_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/copy_riscv.hpp @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_COPY_RISCV_HPP +#define CPU_RISCV_COPY_RISCV_HPP + +#include OS_CPU_HEADER(copy) + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; // fall through + case 7: to[6] = from[6]; // fall through + case 6: to[5] = from[5]; // fall through + case 5: to[4] = from[4]; // fall through + case 4: to[3] = from[3]; // fall through + case 3: to[2] = from[2]; // fall through + case 2: to[1] = from[1]; // fall through + case 1: to[0] = from[0]; // fall through + case 0: break; + default: + memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + _Copy_conjoint_jshorts_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + _Copy_conjoint_jints_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + _Copy_conjoint_jlongs_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); + _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_bytes(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jshorts(from, to, count); +} + +static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jints(from, to, count); +} + +static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jlongs(from, to, count); +} + +static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { + assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + _Copy_arrayof_conjoint_jlongs(from, to, count); +} + +#endif // CPU_RISCV_COPY_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/disassembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/disassembler_riscv.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP +#define CPU_RISCV_DISASSEMBLER_RISCV_HPP + +static int pd_instruction_alignment() { + return 1; +} + +static const char* pd_cpu_opts() { + return ""; +} + +// Returns address of n-th instruction preceding addr, +// NULL if no preceding instruction can be found. +// On riscv, we assume a constant instruction length. +// It might be beneficial to check "is_readable" as we do on ppc and s390. +static address find_prev_instr(address addr, int n_instr) { + return addr - Assembler::instruction_size * n_instr; +} + +// special-case instruction decoding. +// There may be cases where the binutils disassembler doesn't do +// the perfect job. In those cases, decode_instruction0 may kick in +// and do it right. +// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" +static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { + return here; +} + +// platform-specific instruction annotations (like value of loaded constants) +static void annotate(address pc, outputStream* st) {} + +#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "prims/foreign_globals.hpp" +#include "utilities/debug.hpp" + +// Stubbed out, implement later +const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { + Unimplemented(); + return {}; +} + +const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { + Unimplemented(); + return {}; +} + +const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { + ShouldNotCallThis(); + return {}; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP + +class ABIDescriptor {}; +class BufferLayout {}; + +#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.cpp @@ -0,0 +1,688 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/os.inline.hpp" +#include "runtime/signature.hpp" +#include "runtime/stackWatermarkSet.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_riscv.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { + address addr_sp = (address)_sp; + address addr_fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + // sp must be within the usable part of the stack (not in guards) + if (!thread->is_in_usable_stack(addr_sp)) { + return false; + } + + // When we are running interpreted code the machine stack pointer, SP, is + // set low enough so that the Java expression stack can grow and shrink + // without ever exceeding the machine stack bounds. So, ESP >= SP. + + // When we call out of an interpreted method, SP is incremented so that + // the space between SP and ESP is removed. The SP saved in the callee's + // frame is the SP *before* this increment. So, when we walk a stack of + // interpreter frames the sender's SP saved in a frame might be less than + // the SP at the point of call. + + // So unextended sp must be within the stack but we need not to check + // that unextended sp >= sp + + if (!thread->is_in_full_stack_checked(unextended_sp)) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && + thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // to construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address)this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if (!thread->is_in_full_stack_checked((address)sender_sp)) { + return false; + } + + sender_unextended_sp = sender_sp; + sender_pc = (address) *(sender_sp - 1); + saved_fp = (intptr_t*) *(sender_sp - 2); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. + if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { + return false; + } + + // construct the potential sender + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { + return false; + } + + // construct the potential sender + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); + + return jcw_safe; + } + + CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { + return false; + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + if ((address)this->fp()[return_addr_offset] == NULL) { return false; } + + return true; +} + +void frame::patch_pc(Thread* thread, address pc) { + assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); + assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp; +} + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + // Since we are walking the stack now this nested anchor is obviously walkable + // even if it wasn't when it was stacked. + jfa->make_walkable(); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + vmassert(jfa->last_Java_pc() != NULL, "not walkable"); + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; +} + +OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { + ShouldNotCallThis(); + return nullptr; +} + +bool frame::optimized_entry_frame_is_first() const { + ShouldNotCallThis(); + return false; +} + +frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { + ShouldNotCallThis(); + return {}; +} + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. +#ifdef ASSERT +void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + assert_cond(nm != NULL); + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains_inclusive(original_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); +} +#endif + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // On riscv, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + if (_cb != NULL) { + CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); + if (sender_cm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_cm->is_deopt_entry(_pc) || + sender_cm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); + } + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + assert(map != NULL, "map must be set"); + map->set_location(::fp->as_VMReg(), (address) link_addr); + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + map->set_location(::fp->as_VMReg()->next(), (address) link_addr); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_interpreter_frame +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // SP is the raw SP from the sender after adapter or interpreter + // extension. + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + +#ifdef COMPILER2 + assert(map != NULL, "map must be set"); + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif // COMPILER2 + + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_compiled_frame +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + // we cannot rely upon the last fp having been saved to the thread + // in C2 code but it will have been pushed onto the stack. so we + // have to find it relative to the unextended sp + + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = l_sender_sp; + + // the return_address is always the word on the stack + address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); + + intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); + + assert(map != NULL, "map must be set"); + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of FP there is no + // oopmap for it so we must fill in its location as if there was + // an oopmap entry since if our caller was compiled code there + // could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + + return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +//------------------------------------------------------------------------------ +// frame::sender_raw +frame frame::sender_raw(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + assert(map != NULL, "map must be set"); + map->set_include_argument_oops(false); + + if (is_entry_frame()) { + return sender_for_entry_frame(map); + } + if (is_interpreted_frame()) { + return sender_for_interpreter_frame(map); + } + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + // This test looks odd: why is it not is_compiled_frame() ? That's + // because stubs also have OOP maps. + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + +frame frame::sender(RegisterMap* map) const { + frame result = sender_raw(map); + + if (map->process_frames()) { + StackWatermarkSet::on_iteration(map->thread(), result); + } + + return result; +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + Method* m = *interpreter_frame_method_addr(); + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) { + return false; + } + + // stack frames shouldn't be much larger than max_stack elements + // this test requires the use of unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point + // further because of local variables of the callee method inserted after + // method arguments + if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + return false; + } + + // validate bci/bcx + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate constantPoolCache* + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + if (MetaspaceObj::is_valid(cp) == false) { + return false; + } + + // validate locals + address locals = (address) *interpreter_frame_locals_addr(); + if (locals > thread->stack_base() || locals < (address) fp()) { + return false; + } + + // We'd have to be pretty unlucky to be mislead at this point + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr = NULL; + if (method->is_native()) { + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + // This is because we do a push(ltos) after push(dtos) in generate_native_entry. + tos_addr += 2 * Interpreter::stackElementWords; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(Universe::is_in_heap_or_null(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : { + value_result->f = *(jfloat*)tos_addr; + break; + } + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp); + DESCRIBE_FP_OFFSET(interpreter_frame_mirror); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // Not used on riscv, but we must return something. + return NULL; +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(!is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#undef DESCRIBE_FP_OFFSET + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { + init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); +} + +void frame::pd_ps() {} +#endif + +void JavaFrameAnchor::make_walkable() { + // last frame set? + if (last_Java_sp() == NULL) { return; } + // already walkable? + if (walkable()) { return; } + vmassert(last_Java_sp() != NULL, "not called from Java code?"); + vmassert(last_Java_pc() == NULL, "already walkable"); + _last_Java_pc = (address)_last_Java_sp[-1]; + vmassert(walkable(), "something went wrong"); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.hpp @@ -0,0 +1,202 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_FRAME_RISCV_HPP +#define CPU_RISCV_FRAME_RISCV_HPP + +#include "runtime/synchronizer.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp + +// [monitors[0] ] \ +// ... | monitor block size = k +// [monitors[k-1] ] / +// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset +// [byte code index/pointr] = bcx() bcx_offset + +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset + +// [klass of method ] = mirror() mirror_offset +// [padding ] + +// [methodData ] = mdp() mdx_offset +// [Method ] = method() method_offset + +// [last esp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset + +// [old frame pointer ] +// [return pc ] + +// [last sp ] <- fp = link() +// [oop temp ] (only for native calls) + +// [padding ] (to preserve machine SP alignment) +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + +// ------------------------------ C Frame ------------------------------------------------ +// Stack: gcc with -fno-omit-frame-pointer +// . +// . +// +-> . +// | +-----------------+ | +// | | return address | | +// | | previous fp ------+ +// | | saved registers | +// | | local variables | +// | | ... | <-+ +// | +-----------------+ | +// | | return address | | +// +------ previous fp | | +// | saved registers | | +// | local variables | | +// +-> | ... | | +// | +-----------------+ | +// | | return address | | +// | | previous fp ------+ +// | | saved registers | +// | | local variables | +// | | ... | <-+ +// | +-----------------+ | +// | | return address | | +// +------ previous fp | | +// | saved registers | | +// | local variables | | +// $fp --> | ... | | +// +-----------------+ | +// | return address | | +// | previous fp ------+ +// | saved registers | +// $sp --> | local variables | +// +-----------------+ +// ------------------------------ C Frame ------------------------------------------------ + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = -2, + return_addr_offset = -1, + sender_sp_offset = 0, + // Interpreter frames + interpreter_frame_oop_temp_offset = 1, // for native calls only + + interpreter_frame_sender_sp_offset = -3, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, + interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, + interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub + entry_frame_after_call_words = 22, + entry_frame_call_wrapper_offset = -10, + + // we don't need a save area + arg_reg_save_area_bytes = 0 + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } + +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); +#endif + + public: + // Constructors + + frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + + frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc); + + frame(intptr_t* ptr_sp, intptr_t* ptr_fp); + + void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); + + // accessors for the instance variables + // Note: not necessarily the real 'frame pointer' (see real_fp) + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved RBP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* last_sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + + // returns the sending frame, without applying any barriers + frame sender_raw(RegisterMap* map) const; + +#endif // CPU_RISCV_FRAME_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.inline.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/frame_riscv.inline.hpp @@ -0,0 +1,245 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP +#define CPU_RISCV_FRAME_RISCV_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" + +// Inline functions for RISCV frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +static int spin; + +inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { + intptr_t a = intptr_t(ptr_sp); + intptr_t b = intptr_t(ptr_fp); + _sp = ptr_sp; + _unextended_sp = ptr_sp; + _fp = ptr_fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { + init(ptr_sp, ptr_fp, pc); +} + +inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) { + intptr_t a = intptr_t(ptr_sp); + intptr_t b = intptr_t(ptr_fp); + _sp = ptr_sp; + _unextended_sp = unextended_sp; + _fp = ptr_fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) { + intptr_t a = intptr_t(ptr_sp); + intptr_t b = intptr_t(ptr_fp); + _sp = ptr_sp; + _unextended_sp = ptr_sp; + _fp = ptr_fp; + _pc = (address)(ptr_sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() && + unextended_sp() == other.unextended_sp() && + fp() == other.fp() && + pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + +inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } + +inline intptr_t* frame::link_or_null() const { + intptr_t** ptr = (intptr_t **)addr_at(link_offset); + return os::is_readable_pointer(ptr) ? *ptr : NULL; +} + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address +inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } +inline address frame::sender_pc() const { return *sender_pc_addr(); } +inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcp_offset); +} + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdp_offset); +} + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// Mirror + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)addr_at(interpreter_frame_mirror_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL) { + return sp(); + } else { + // sp() may have been extended or shrunk by an adapter. At least + // check that we don't fall behind the legal region. + // For top deoptimized frame last_sp == interpreter_frame_monitor_end. + assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + + +// Compiled frames +inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + return (*result_adr); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + *result_adr = obj; +} + +#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1/g1BarrierSetC1.hpp" +#endif + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) { + assert_cond(masm != NULL); + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + if (!dest_uninitialized) { + Label done; + Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(t0, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(t0, in_progress); + } + __ beqz(t0, done); + + __ push_reg(saved_regs, sp); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ mv(t0, c_rarg0); + __ mv(c_rarg0, c_rarg1); + __ mv(c_rarg1, t0); + } else { + __ mv(c_rarg1, count); + __ mv(c_rarg0, addr); + } + } else { + __ mv(c_rarg0, addr); + __ mv(c_rarg1, count); + } + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); + } + __ pop_reg(saved_regs, sp); + + __ bind(done); + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { + assert_cond(masm != NULL); + __ push_reg(saved_regs, sp); + assert_different_registers(start, count, tmp); + assert_different_registers(c_rarg0, count); + __ mv(c_rarg0, start); + __ mv(c_rarg1, count); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); + __ pop_reg(saved_regs, sp); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + + Label done; + Label runtime; + + assert_different_registers(obj, pre_val, tmp, t0); + assert(pre_val != noreg && tmp != noreg, "expecting a register"); + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width + __ lwu(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + + // Is the previous value null? + __ beqz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ld(tmp, index); // tmp := *index_adr + __ beqz(tmp, runtime); // tmp == 0? + // If yes, goto runtime + + __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize + __ sd(tmp, index); // *index_adr := tmp + __ ld(t0, buffer); + __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + + // Record the previous value + __ sd(pre_val, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(pre_val); + if (tosca_live) { saved += RegSet::of(x10); } + if (obj != noreg) { saved += RegSet::of(obj); } + + __ push_reg(saved, sp); + + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + __ pop_reg(saved, sp); + + __ bind(done); + +} + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp, tmp2, + t0); + assert(store_addr != noreg && new_val != noreg && tmp != noreg && + tmp2 != noreg, "expecting a register"); + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + + Label done; + Label runtime; + + // Does store cross heap regions? + + __ xorr(tmp, store_addr, new_val); + __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes); + __ beqz(tmp, done); + + // crosses regions, storing NULL? + + __ beqz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base()); + const Register card_addr = tmp; + + __ srli(card_addr, store_addr, CardTable::card_shift); + + // get the address of the card + __ load_byte_map_base(tmp2); + __ add(card_addr, card_addr, tmp2); + __ lbu(tmp2, Address(card_addr)); + __ mv(t0, (int)G1CardTable::g1_young_card_val()); + __ beq(tmp2, t0, done); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(MacroAssembler::StoreLoad); + + __ lbu(tmp2, Address(card_addr)); + __ beqz(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + __ sb(zr, Address(card_addr)); + + __ ld(t0, queue_index); + __ beqz(t0, runtime); + __ sub(t0, t0, wordSize); + __ sd(t0, queue_index); + + __ ld(tmp2, buffer); + __ add(t0, tmp2, t0); + __ sd(card_addr, Address(t0, 0)); + __ j(done); + + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(store_addr); + __ push_reg(saved, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_reg(saved, sp); + + __ bind(done); +} + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + assert_cond(masm != NULL); + bool on_oop = is_reference_type(type); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + // RA is live. It must be saved around calls. + __ enter(); // barrier may call runtime + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + } +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + assert_cond(masm != NULL); + // flatten object address if needed + if (dst.offset() == 0) { + if (dst.base() != x13) { + __ mv(x13, dst.base()); + } + } else { + __ la(x13, dst); + } + + g1_write_barrier_pre(masm, + x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + + if (val == noreg) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = t1; + __ mv(new_val, val); + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + g1_write_barrier_post(masm, + x13 /* store_adr */, + new_val /* new_val */, + xthread /* thread */, + tmp1 /* tmp */, + tmp2 /* tmp2 */); + } +} + +#ifdef COMPILER1 + +#undef __ +#define __ ce->masm()-> + +void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ j(*stub->continuation()); +} + +void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + assert(stub->addr()->is_register(), "Precondition"); + assert(stub->new_val()->is_register(), "Precondition"); + Register new_val_reg = stub->new_val()->as_register(); + __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->addr()->as_pointer_register(), 0); + __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); + __ j(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_pre_barrier", false); + + BarrierSet* bs = BarrierSet::barrier_set(); + + // arg0 : previous value of memory + const Register pre_val = x10; + const Register thread = xthread; + const Register tmp = t0; + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width + __ lwu(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); + + // Can we store original value in the thread's buffer? + __ ld(tmp, queue_index); + __ beqz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ sd(tmp, queue_index); + __ ld(t1, buffer); + __ add(tmp, tmp, t1); + __ load_parameter(0, t1); + __ sd(t1, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ load_parameter(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + + __ epilogue(); +} + +void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + // arg0 : store_address + Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + const Register thread = xthread; + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + const Register card_offset = t1; + // RA is free here, so we can use it to hold the byte_map_base. + const Register byte_map_base = ra; + + assert_different_registers(card_offset, byte_map_base, t0); + + __ load_parameter(0, card_offset); + __ srli(card_offset, card_offset, CardTable::card_shift); + __ load_byte_map_base(byte_map_base); + + // Convert card offset into an address in card_addr + Register card_addr = card_offset; + __ add(card_addr, byte_map_base, card_addr); + + __ lbu(t0, Address(card_addr, 0)); + __ sub(t0, t0, (int)G1CardTable::g1_young_card_val()); + __ beqz(t0, done); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(MacroAssembler::StoreLoad); + __ lbu(t0, Address(card_addr, 0)); + __ beqz(t0, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ sb(zr, Address(card_addr, 0)); + + __ ld(t0, queue_index); + __ beqz(t0, runtime); + __ sub(t0, t0, wordSize); + __ sd(t0, queue_index); + + // Reuse RA to hold buffer_addr + const Register buffer_addr = ra; + + __ ld(buffer_addr, buffer); + __ add(t0, buffer_addr, t0); + __ sd(card_addr, Address(t0, 0)); + __ j(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" +#include "utilities/macros.hpp" + +#ifdef COMPILER1 +class LIR_Assembler; +#endif +class StubAssembler; +class G1PreBarrierStub; +class G1PostBarrierStub; + +class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs); + void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs); + + void g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + +public: +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); + void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); +#endif + + void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); +}; + +#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP +#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP + +const size_t G1MergeHeapRootsPrefetchCacheSize = 16; + +#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/classLoaderData.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "memory/universe.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + assert_cond(masm != NULL); + + // RA is live. It must be saved around calls. + + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + switch (type) { + case T_OBJECT: // fall through + case T_ARRAY: { + if (in_heap) { + if (UseCompressedOops) { + __ lwu(dst, src); + if (is_not_null) { + __ decode_heap_oop_not_null(dst); + } else { + __ decode_heap_oop(dst); + } + } else { + __ ld(dst, src); + } + } else { + assert(in_native, "why else?"); + __ ld(dst, src); + } + break; + } + case T_BOOLEAN: __ load_unsigned_byte (dst, src); break; + case T_BYTE: __ load_signed_byte (dst, src); break; + case T_CHAR: __ load_unsigned_short(dst, src); break; + case T_SHORT: __ load_signed_short (dst, src); break; + case T_INT: __ lw (dst, src); break; + case T_LONG: __ ld (dst, src); break; + case T_ADDRESS: __ ld (dst, src); break; + case T_FLOAT: __ flw (f10, src); break; + case T_DOUBLE: __ fld (f10, src); break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + assert_cond(masm != NULL); + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + switch (type) { + case T_OBJECT: // fall through + case T_ARRAY: { + val = val == noreg ? zr : val; + if (in_heap) { + if (UseCompressedOops) { + assert(!dst.uses(val), "not enough registers"); + if (val != zr) { + __ encode_heap_oop(val); + } + __ sw(val, dst); + } else { + __ sd(val, dst); + } + } else { + assert(in_native, "why else?"); + __ sd(val, dst); + } + break; + } + case T_BOOLEAN: + __ andi(val, val, 0x1); // boolean is true if LSB is 1 + __ sb(val, dst); + break; + case T_BYTE: __ sb(val, dst); break; + case T_CHAR: __ sh(val, dst); break; + case T_SHORT: __ sh(val, dst); break; + case T_INT: __ sw(val, dst); break; + case T_LONG: __ sd(val, dst); break; + case T_ADDRESS: __ sd(val, dst); break; + case T_FLOAT: __ fsw(f10, dst); break; + case T_DOUBLE: __ fsd(f10, dst); break; + default: Unimplemented(); + } + +} + +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + assert_cond(masm != NULL); + // If mask changes we need to ensure that the inverse is still encodable as an immediate + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); + __ andi(obj, obj, ~JNIHandles::weak_tag_mask); + __ ld(obj, Address(obj, 0)); // *obj +} + +// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. +void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1, + Register tmp2, + Label& slow_case, + bool is_far) { + assert_cond(masm != NULL); + assert_different_registers(obj, tmp2); + assert_different_registers(obj, var_size_in_bytes); + Register end = tmp2; + + __ ld(obj, Address(xthread, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + __ la(end, Address(obj, con_size_in_bytes)); + } else { + __ add(end, obj, var_size_in_bytes); + } + __ ld(t0, Address(xthread, JavaThread::tlab_end_offset())); + __ bgtu(end, t0, slow_case, is_far); + + // update the tlab top pointer + __ sd(end, Address(xthread, JavaThread::tlab_top_offset())); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + __ sub(var_size_in_bytes, var_size_in_bytes, obj); + } +} + +// Defines obj, preserves var_size_in_bytes +void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1, + Label& slow_case, + bool is_far) { + assert_cond(masm != NULL); + assert_different_registers(obj, var_size_in_bytes, tmp1); + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ j(slow_case); + } else { + Register end = tmp1; + Label retry; + __ bind(retry); + + // Get the current end of the heap + ExternalAddress address_end((address) Universe::heap()->end_addr()); + { + int32_t offset; + __ la_patchable(t1, address_end, offset); + __ ld(t1, Address(t1, offset)); + } + + // Get the current top of the heap + ExternalAddress address_top((address) Universe::heap()->top_addr()); + { + int32_t offset; + __ la_patchable(t0, address_top, offset); + __ addi(t0, t0, offset); + __ lr_d(obj, t0, Assembler::aqrl); + } + + // Adjust it my the size of our new object + if (var_size_in_bytes == noreg) { + __ la(end, Address(obj, con_size_in_bytes)); + } else { + __ add(end, obj, var_size_in_bytes); + } + + // if end < obj then we wrapped around high memory + __ bltu(end, obj, slow_case, is_far); + + __ bgtu(end, t1, slow_case, is_far); + + // If heap_top hasn't been changed by some other thread, update it. + __ sc_d(t1, end, t0, Assembler::rl); + __ bnez(t1, retry); + + incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); + } +} + +void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1) { + assert_cond(masm != NULL); + assert(tmp1->is_valid(), "need temp reg"); + + __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); + if (var_size_in_bytes->is_valid()) { + __ add(tmp1, tmp1, var_size_in_bytes); + } else { + __ add(tmp1, tmp1, con_size_in_bytes); + } + __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); +} + +void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + + if (bs_nm == NULL) { + return; + } + + // RISCV atomic operations require that the memory address be naturally aligned. + __ align(4); + + Label skip, guard; + Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); + + __ lwu(t0, guard); + + // Subsequent loads of oops must occur after load of guard value. + // BarrierSetNMethod::disarm sets guard with release semantics. + __ membar(MacroAssembler::LoadLoad); + __ lwu(t1, thread_disarmed_addr); + __ beq(t0, t1, skip); + + int32_t offset = 0; + __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); + __ jalr(ra, t0, offset); + __ j(skip); + + __ bind(guard); + + assert(__ offset() % 4 == 0, "bad alignment"); + __ emit_int32(0); // nmethod guard value. Skipped over in common case. + + __ bind(skip); +} + +void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { + BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs == NULL) { + return; + } + + Label bad_call; + __ beqz(xmethod, bad_call); + + // Pointer chase to the method holder to find out if the method is concurrently unloading. + Label method_live; + __ load_method_holder_cld(t0, xmethod); + + // Is it a strong CLD? + __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); + __ bnez(t1, method_live); + + // Is it a weak but alive CLD? + __ push_reg(RegSet::of(x28, x29), sp); + + __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); + + // Uses x28 & x29, so we must pass new temporaries. + __ resolve_weak_handle(x28, x29); + __ mv(t0, x28); + + __ pop_reg(RegSet::of(x28, x29), sp); + + __ bnez(t0, method_live); + + __ bind(bad_call); + + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(method_live); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + +class BarrierSetAssembler: public CHeapObj { +private: + void incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) {} + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register end, Register tmp, RegSet saved_regs) {} + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void tlab_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case, // continuation point if fast allocation fails + bool is_far = false + ); + + void eden_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Label& slow_case, // continuation point if fast allocation fails + bool is_far = false + ); + virtual void barrier_stubs_init() {} + + virtual void nmethod_entry_barrier(MacroAssembler* masm); + virtual void c2i_entry_barrier(MacroAssembler* masm); + virtual ~BarrierSetAssembler() {} +}; + +#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nativeInst.hpp" +#include "gc/shared/barrierSetNMethod.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/registerMap.hpp" +#include "runtime/thread.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" + +class NativeNMethodBarrier: public NativeInstruction { + address instruction_address() const { return addr_at(0); } + + int *guard_addr() { + /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ + return reinterpret_cast(instruction_address() + 12 * 4); + } + +public: + int get_value() { + return Atomic::load_acquire(guard_addr()); + } + + void set_value(int value) { + Atomic::release_store(guard_addr(), value); + } + + void verify() const; +}; + +// Store the instruction bitmask, bits and name for checking the barrier. +struct CheckInsn { + uint32_t mask; + uint32_t bits; + const char *name; +}; + +static const struct CheckInsn barrierInsn[] = { + { 0x00000fff, 0x00000297, "auipc t0, 0 "}, + { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, + { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, + { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, + { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, + { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, + { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, + { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, + { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, + { 0x00000fff, 0x0000006f, "j skip "} + /* guard: */ + /* 32bit nmethod guard value */ + /* skip: */ +}; + +// The encodings must match the instructions emitted by +// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific +// register numbers and immediate values in the encoding. +void NativeNMethodBarrier::verify() const { + intptr_t addr = (intptr_t) instruction_address(); + for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { + uint32_t inst = *((uint32_t*) addr); + if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { + tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); + fatal("not an %s instruction.", barrierInsn[i].name); + } + addr += 4; + } +} + + +/* We're called from an nmethod when we need to deoptimize it. We do + this by throwing away the nmethod's frame and jumping to the + ic_miss stub. This looks like there has been an IC miss at the + entry of the nmethod, so we resolve the call, which will fall back + to the interpreter if the nmethod has been unloaded. */ +void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { + + typedef struct { + intptr_t *sp; intptr_t *fp; address ra; address pc; + } frame_pointers_t; + + frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); + + JavaThread *thread = JavaThread::current(); + RegisterMap reg_map(thread, false); + frame frame = thread->last_frame(); + + assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); + assert(frame.cb() == nm, "must be"); + frame = frame.sender(®_map); + + LogTarget(Trace, nmethod, barrier) out; + if (out.is_enabled()) { + ResourceMark mark; + log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", + nm->method()->name_and_sig_as_C_string(), + nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, + thread->get_thread_name(), frame.sp(), nm->verified_entry_point()); + } + + new_frame->sp = frame.sp(); + new_frame->fp = frame.fp(); + new_frame->ra = frame.pc(); + new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); +} + +// This is the offset of the entry barrier from where the frame is completed. +// If any code changes between the end of the verified entry where the entry +// barrier resides, and the completion of the frame, then +// NativeNMethodCmpBarrier::verify() will immediately complain when it does +// not find the expected native instruction at this offset, which needs updating. +// Note that this offset is invariant of PreserveFramePointer. + +// see BarrierSetAssembler::nmethod_entry_barrier +// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 +static const int entry_barrier_offset = -4 * 13; + +static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { + address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; + NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); + debug_only(barrier->verify()); + return barrier; +} + +void BarrierSetNMethod::disarm(nmethod* nm) { + if (!supports_entry_barrier(nm)) { + return; + } + + // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. + NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + + barrier->set_value(disarmed_value()); +} + +bool BarrierSetNMethod::is_armed(nmethod* nm) { + if (!supports_entry_barrier(nm)) { + return false; + } + + NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); + return barrier->get_value() != disarmed_value(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" +#include "gc/shared/gc_globals.hpp" +#include "interpreter/interp_masm.hpp" + +#define __ masm-> + + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { + assert_cond(masm != NULL); + assert_different_registers(obj, tmp); + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + + __ srli(obj, obj, CardTable::card_shift); + + assert(CardTable::dirty_card_val() == 0, "must be"); + + __ load_byte_map_base(tmp); + __ add(tmp, obj, tmp); + + if (UseCondCardMark) { + Label L_already_dirty; + __ membar(MacroAssembler::StoreLoad); + __ lbu(t1, Address(tmp)); + __ beqz(t1, L_already_dirty); + __ sb(zr, Address(tmp)); + __ bind(L_already_dirty); + } else { + __ sb(zr, Address(tmp)); + } +} + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { + assert_cond(masm != NULL); + assert_different_registers(start, tmp); + assert_different_registers(count, tmp); + + Label L_loop, L_done; + const Register end = count; + + __ beqz(count, L_done); // zero count - nothing to do + // end = start + count << LogBytesPerHeapOop + __ shadd(end, count, start, count, LogBytesPerHeapOop); + __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive + + __ srli(start, start, CardTable::card_shift); + __ srli(end, end, CardTable::card_shift); + __ sub(count, end, start); // number of bytes to copy + + __ load_byte_map_base(tmp); + __ add(start, start, tmp); + + __ bind(L_loop); + __ add(tmp, start, count); + __ sb(zr, Address(tmp)); + __ sub(count, count, 1); + __ bgez(count, L_loop); + __ bind(L_done); +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || dst.offset() == 0) { + store_check(masm, dst.base(), x13); + } else { + assert_cond(masm != NULL); + __ la(x13, dst); + store_check(masm, x13, t0); + } + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void store_check(MacroAssembler* masm, Register obj, Register tmp); + + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +#define __ masm-> + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { + + if (is_oop) { + gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); + } +} + +void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register count, Register tmp, + RegSet saved_regs) { + if (is_oop) { + gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs); + } +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + if (is_reference_type(type)) { + oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + +class ModRefBarrierSetAssembler: public BarrierSetAssembler { +protected: + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) {} + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) {} + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) = 0; + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs); + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" + +#define __ masm->masm()-> + +void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { + Register addr = _addr->as_register_lo(); + Register newval = _new_value->as_register(); + Register cmpval = _cmp_value->as_register(); + Register tmp1 = _tmp1->as_register(); + Register tmp2 = _tmp2->as_register(); + Register result = result_opr()->as_register(); + + ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1); + + if (UseCompressedOops) { + __ encode_heap_oop(tmp1, cmpval); + cmpval = tmp1; + __ encode_heap_oop(tmp2, newval); + newval = tmp2; + } + + ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, + /* release */ Assembler::rl, /* is_cae */ false, result); +} + +#undef __ + +#ifdef ASSERT +#define __ gen->lir(__FILE__, __LINE__)-> +#else +#define __ gen->lir()-> +#endif + +LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { + BasicType bt = access.type(); + if (access.is_oop()) { + LIRGenerator *gen = access.gen(); + if (ShenandoahSATBBarrier) { + pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), + LIR_OprFact::illegalOpr /* pre_val */); + } + if (ShenandoahCASBarrier) { + cmp_value.load_item(); + new_value.load_item(); + + LIR_Opr tmp1 = gen->new_register(T_OBJECT); + LIR_Opr tmp2 = gen->new_register(T_OBJECT); + LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); + LIR_Opr result = gen->new_register(T_INT); + + __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result)); + return result; + } + } + return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); +} + +LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { + LIRGenerator* gen = access.gen(); + BasicType type = access.type(); + + LIR_Opr result = gen->new_register(type); + value.load_item(); + LIR_Opr value_opr = value.result(); + + if (access.is_oop()) { + value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); + } + + assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Opr tmp = gen->new_register(T_INT); + __ xchg(access.resolved_addr(), value_opr, result, tmp); + + if (access.is_oop()) { + result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); + LIR_Opr tmp_opr = gen->new_register(type); + __ move(result, tmp_opr); + result = tmp_opr; + if (ShenandoahSATBBarrier) { + pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, + result /* pre_val */); + } + } + + return result; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp @@ -0,0 +1,712 @@ +/* + * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahForwarding.hpp" +#include "gc/shenandoah/shenandoahHeap.inline.hpp" +#include "gc/shenandoah/shenandoahHeapRegion.hpp" +#include "gc/shenandoah/shenandoahRuntime.hpp" +#include "gc/shenandoah/shenandoahThreadLocalData.hpp" +#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" +#endif + +#define __ masm-> + +void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { + if (is_oop) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { + + Label done; + + // Avoid calling runtime if count == 0 + __ beqz(count, done); + + // Is GC active? + Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + assert_different_registers(src, dst, count, t0); + + __ lbu(t0, gc_state); + if (ShenandoahSATBBarrier && dest_uninitialized) { + __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED); + __ beqz(t0, done); + } else { + __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); + __ beqz(t0, done); + } + + __ push_reg(saved_regs, sp); + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), + src, dst, count); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); + } + __ pop_reg(saved_regs, sp); + __ bind(done); + } + } +} + +void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + if (ShenandoahSATBBarrier) { + satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); + } +} + +void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + assert(thread == xthread, "must be"); + + Label done; + Label runtime; + + assert_different_registers(obj, pre_val, tmp, t0); + assert(pre_val != noreg && tmp != noreg, "expecting a register"); + + Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + + // Is the previous value null? + __ beqz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + __ ld(tmp, index); // tmp := *index_adr + __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime + + __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize + __ sd(tmp, index); // *index_adr := tmp + __ ld(t0, buffer); + __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr + + // Record the previous value + __ sd(pre_val, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + // save the live input values + RegSet saved = RegSet::of(pre_val); + if (tosca_live) saved += RegSet::of(x10); + if (obj != noreg) saved += RegSet::of(obj); + + __ push_reg(saved, sp); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + __ pop_reg(saved, sp); + + __ bind(done); +} + +void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { + assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); + + Label is_null; + __ beqz(dst, is_null); + resolve_forward_pointer_not_null(masm, dst, tmp); + __ bind(is_null); +} + +// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely +// passed in. +void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { + assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); + // The below loads the mark word, checks if the lowest two bits are + // set, and if so, clear the lowest two bits and copy the result + // to dst. Otherwise it leaves dst alone. + // Implementing this is surprisingly awkward. I do it here by: + // - Inverting the mark word + // - Test lowest two bits == 0 + // - If so, set the lowest two bits + // - Invert the result back, and copy to dst + RegSet saved_regs = RegSet::of(t2); + bool borrow_reg = (tmp == noreg); + if (borrow_reg) { + // No free registers available. Make one useful. + tmp = t0; + if (tmp == dst) { + tmp = t1; + } + saved_regs += RegSet::of(tmp); + } + + assert_different_registers(tmp, dst, t2); + __ push_reg(saved_regs, sp); + + Label done; + __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); + __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ andi(t2, tmp, markWord::lock_mask_in_place); + __ bnez(t2, done); + __ ori(tmp, tmp, markWord::marked_value); + __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ bind(done); + + __ pop_reg(saved_regs, sp); +} + +void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, + Register dst, + Address load_addr, + DecoratorSet decorators) { + assert(ShenandoahLoadRefBarrier, "Should be enabled"); + assert(dst != t1 && load_addr.base() != t1, "need t1"); + assert_different_registers(load_addr.base(), t0, t1); + + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + bool is_narrow = UseCompressedOops && !is_native; + + Label heap_stable, not_cset; + __ enter(); + Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ lbu(t1, gc_state); + + // Check for heap stability + if (is_strong) { + __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); + __ beqz(t1, heap_stable); + } else { + Label lrb; + __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); + __ bnez(t0, lrb); + __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); + __ beqz(t0, heap_stable); + __ bind(lrb); + } + + // use x11 for load address + Register result_dst = dst; + if (dst == x11) { + __ mv(t1, dst); + dst = t1; + } + + // Save x10 and x11, unless it is an output register + RegSet saved_regs = RegSet::of(x10, x11) - result_dst; + __ push_reg(saved_regs, sp); + __ la(x11, load_addr); + __ mv(x10, dst); + + // Test for in-cset + if (is_strong) { + __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); + __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ add(t1, t1, t0); + __ lbu(t1, Address(t1)); + __ andi(t0, t1, 1); + __ beqz(t0, not_cset); + } + + __ push_call_clobbered_registers(); + if (is_strong) { + if (is_narrow) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); + } + } else if (is_weak) { + if (is_narrow) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); + } + } else { + assert(is_phantom, "only remaining strength"); + assert(!is_narrow, "phantom access cannot be narrow"); + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); + } + __ jalr(ra); + __ mv(t0, x10); + __ pop_call_clobbered_registers(); + __ mv(x10, t0); + __ bind(not_cset); + __ mv(result_dst, x10); + __ pop_reg(saved_regs, sp); + + __ bind(heap_stable); + __ leave(); +} + +void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { + if (ShenandoahIUBarrier) { + __ push_call_clobbered_registers(); + + satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); + + __ pop_call_clobbered_registers(); + } +} + +// +// Arguments: +// +// Inputs: +// src: oop location to load from, might be clobbered +// +// Output: +// dst: oop loaded from src location +// +// Kill: +// x30 (tmp reg) +// +// Alias: +// dst: x30 (might use x30 as temporary output register to avoid clobbering src) +// +void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Register dst, + Address src, + Register tmp1, + Register tmp_thread) { + // 1: non-reference load, no additional barrier is needed + if (!is_reference_type(type)) { + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + return; + } + + // 2: load a reference from src location and apply LRB if needed + if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { + Register result_dst = dst; + + // Preserve src location for LRB + RegSet saved_regs; + if (dst == src.base()) { + dst = (src.base() == x28) ? x29 : x28; + saved_regs = RegSet::of(dst); + __ push_reg(saved_regs, sp); + } + assert_different_registers(dst, src.base()); + + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + + load_reference_barrier(masm, dst, src, decorators); + + if (dst != result_dst) { + __ mv(result_dst, dst); + dst = result_dst; + } + + if (saved_regs.bits() != 0) { + __ pop_reg(saved_regs, sp); + } + } else { + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + } + + // 3: apply keep-alive barrier if needed + if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { + __ enter(); + __ push_call_clobbered_registers(); + satb_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ pop_call_clobbered_registers(); + __ leave(); + } +} + +void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool on_oop = is_reference_type(type); + if (!on_oop) { + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + return; + } + + // flatten object address if needed + if (dst.offset() == 0) { + if (dst.base() != x13) { + __ mv(x13, dst.base()); + } + } else { + __ la(x13, dst); + } + + shenandoah_write_barrier_pre(masm, + x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + + if (val == noreg) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + iu_barrier(masm, val, tmp1); + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = t1; + __ mv(new_val, val); + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + } +} + +void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + Label done; + // Resolve jobject + BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); + + // Check for null. + __ beqz(obj, done); + + assert(obj != t1, "need t1"); + Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); + __ lbu(t1, gc_state); + + // Check for heap in evacuation phase + __ andi(t0, t1, ShenandoahHeap::EVACUATION); + __ bnez(t0, slowpath); + + __ bind(done); +} + +// Special Shenandoah CAS implementation that handles false negatives due +// to concurrent evacuation. The service is more complex than a +// traditional CAS operation because the CAS operation is intended to +// succeed if the reference at addr exactly matches expected or if the +// reference at addr holds a pointer to a from-space object that has +// been relocated to the location named by expected. There are two +// races that must be addressed: +// a) A parallel thread may mutate the contents of addr so that it points +// to a different object. In this case, the CAS operation should fail. +// b) A parallel thread may heal the contents of addr, replacing a +// from-space pointer held in addr with the to-space pointer +// representing the new location of the object. +// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL +// or it refers to an object that is not being evacuated out of +// from-space, or it refers to the to-space version of an object that +// is being evacuated out of from-space. +// +// By default the value held in the result register following execution +// of the generated code sequence is 0 to indicate failure of CAS, +// non-zero to indicate success. If is_cae, the result is the value most +// recently fetched from addr rather than a boolean success indicator. +// +// Clobbers t0, t1 +void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, + Register addr, + Register expected, + Register new_val, + Assembler::Aqrl acquire, + Assembler::Aqrl release, + bool is_cae, + Register result) { + bool is_narrow = UseCompressedOops; + Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64; + + assert_different_registers(addr, expected, t0, t1); + assert_different_registers(addr, new_val, t0, t1); + + Label retry, success, fail, done; + + __ bind(retry); + + // Step1: Try to CAS. + __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1); + + // If success, then we are done. + __ beq(expected, t1, success); + + // Step2: CAS failed, check the forwared pointer. + __ mv(t0, t1); + + if (is_narrow) { + __ decode_heap_oop(t0, t0); + } + resolve_forward_pointer(masm, t0); + + __ encode_heap_oop(t0, t0); + + // Report failure when the forwarded oop was not expected. + __ bne(t0, expected, fail); + + // Step 3: CAS again using the forwarded oop. + __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0); + + // Retry when failed. + __ bne(t0, t1, retry); + + __ bind(success); + if (is_cae) { + __ mv(result, expected); + } else { + __ addi(result, zr, 1); + } + __ j(done); + + __ bind(fail); + if (is_cae) { + __ mv(result, t0); + } else { + __ mv(result, zr); + } + + __ bind(done); +} + +#undef __ + +#ifdef COMPILER1 + +#define __ ce->masm()-> + +void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { + ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ j(*stub->continuation()); +} + +void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, + ShenandoahLoadReferenceBarrierStub* stub) { + ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + + DecoratorSet decorators = stub->decorators(); + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + + Register obj = stub->obj()->as_register(); + Register res = stub->result()->as_register(); + Register addr = stub->addr()->as_pointer_register(); + Register tmp1 = stub->tmp1()->as_register(); + Register tmp2 = stub->tmp2()->as_register(); + + assert(res == x10, "result must arrive in x10"); + assert_different_registers(tmp1, tmp2, t0); + + if (res != obj) { + __ mv(res, obj); + } + + if (is_strong) { + // Check for object in cset. + __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); + __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ add(tmp2, tmp2, tmp1); + __ lbu(tmp2, Address(tmp2)); + __ beqz(tmp2, *stub->continuation(), true /* is_far */); + } + + ce->store_parameter(res, 0); + ce->store_parameter(addr, 1); + + if (is_strong) { + if (is_native) { + __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); + } else { + __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); + } + } else if (is_weak) { + __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); + } else { + assert(is_phantom, "only remaining strength"); + __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); + } + + __ j(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("shenandoah_pre_barrier", false); + + // arg0 : previous value of memory + + BarrierSet* bs = BarrierSet::barrier_set(); + + const Register pre_val = x10; + const Register thread = xthread; + const Register tmp = t0; + + Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ lb(tmp, gc_state); + __ andi(tmp, tmp, ShenandoahHeap::MARKING); + __ beqz(tmp, done); + + // Can we store original value in the thread's buffer? + __ ld(tmp, queue_index); + __ beqz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ sd(tmp, queue_index); + __ ld(t1, buffer); + __ add(tmp, tmp, t1); + __ load_parameter(0, t1); + __ sd(t1, Address(tmp, 0)); + __ j(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ load_parameter(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + + __ epilogue(); +} + +void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) { + __ prologue("shenandoah_load_reference_barrier", false); + // arg0 : object to be resolved + + __ push_call_clobbered_registers(); + __ load_parameter(0, x10); + __ load_parameter(1, x11); + + bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); + bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); + bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); + bool is_native = ShenandoahBarrierSet::is_native_access(decorators); + if (is_strong) { + if (is_native) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); + } else { + if (UseCompressedOops) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); + } + } + } else if (is_weak) { + assert(!is_native, "weak must not be called off-heap"); + if (UseCompressedOops) { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); + } else { + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); + } + } else { + assert(is_phantom, "only remaining strength"); + assert(is_native, "phantom must only be called off-heap"); + __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); + } + __ jalr(ra); + __ mv(t0, x10); + __ pop_call_clobbered_registers(); + __ mv(x10, t0); + + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#ifdef COMPILER1 +class LIR_Assembler; +class ShenandoahPreBarrierStub; +class ShenandoahLoadReferenceBarrierStub; +class StubAssembler; +#endif +class StubCodeGenerator; + +class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { +private: + + void satb_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + void shenandoah_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); + void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); + +public: + + void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); + +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); + void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); +#endif + + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs); + + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, + Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); +}; + +#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad @@ -0,0 +1,285 @@ +// +// Copyright (c) 2018, Red Hat, Inc. All rights reserved. +// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +source_hpp %{ +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +%} + +instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + + format %{ + "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + effect(TEMP_DEF res, TEMP tmp, KILL cr); + + format %{ + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP_DEF res, TEMP tmp, KILL cr); + format %{ + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP_DEF res, TEMP tmp, KILL cr); + format %{ + "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP_DEF res, TEMP tmp, KILL cr); + format %{ + "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} + +instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + + effect(TEMP tmp, KILL cr); + format %{ + "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. + // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, + Assembler::aq /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} + + ins_pipe(pipe_slow); +%} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/codeBlob.hpp" +#include "code/vmreg.inline.hpp" +#include "gc/z/zBarrier.inline.hpp" +#include "gc/z/zBarrierSet.hpp" +#include "gc/z/zBarrierSetAssembler.hpp" +#include "gc/z/zBarrierSetRuntime.hpp" +#include "gc/z/zThreadLocalData.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" +#ifdef COMPILER1 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/z/c1/zBarrierSetC1.hpp" +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/z/c2/zBarrierSetC2.hpp" +#endif // COMPILER2 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::load_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Register dst, + Address src, + Register tmp1, + Register tmp_thread) { + if (!ZBarrierSet::barrier_needed(decorators, type)) { + // Barrier not needed + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + return; + } + + assert_different_registers(t1, src.base()); + assert_different_registers(t0, t1, dst); + + Label done; + + // Load bad mask into temp register. + __ la(t0, src); + __ ld(t1, address_bad_mask_from_thread(xthread)); + __ ld(dst, Address(t0)); + + // Test reference against bad mask. If mask bad, then we need to fix it up. + __ andr(t1, dst, t1); + __ beqz(t1, done); + + __ enter(); + + __ push_call_clobbered_registers_except(RegSet::of(dst)); + + if (c_rarg0 != dst) { + __ mv(c_rarg0, dst); + } + + __ mv(c_rarg1, t0); + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); + + // Make sure dst has the return value. + if (dst != x10) { + __ mv(dst, x10); + } + + __ pop_call_clobbered_registers_except(RegSet::of(dst)); + __ leave(); + + __ bind(done); +} + +#ifdef ASSERT + +void ZBarrierSetAssembler::store_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Address dst, + Register val, + Register tmp1, + Register tmp2) { + // Verify value + if (is_reference_type(type)) { + // Note that src could be noreg, which means we + // are storing null and can skip verification. + if (val != noreg) { + Label done; + + // tmp1 and tmp2 are often set to noreg. + RegSet savedRegs = RegSet::of(t0); + __ push_reg(savedRegs, sp); + + __ ld(t0, address_bad_mask_from_thread(xthread)); + __ andr(t0, val, t0); + __ beqz(t0, done); + __ stop("Verify oop store failed"); + __ should_not_reach_here(); + __ bind(done); + __ pop_reg(savedRegs, sp); + } + } + + // Store value + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); +} + +#endif // ASSERT + +void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, + DecoratorSet decorators, + bool is_oop, + Register src, + Register dst, + Register count, + RegSet saved_regs) { + if (!is_oop) { + // Barrier not needed + return; + } + + BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); + + assert_different_registers(src, count, t0); + + __ push_reg(saved_regs, sp); + + if (count == c_rarg0 && src == c_rarg1) { + // exactly backwards!! + __ xorr(c_rarg0, c_rarg0, c_rarg1); + __ xorr(c_rarg1, c_rarg0, c_rarg1); + __ xorr(c_rarg0, c_rarg0, c_rarg1); + } else { + __ mv(c_rarg0, src); + __ mv(c_rarg1, count); + } + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); + + __ pop_reg(saved_regs, sp); + + BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); +} + +void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, + Register jni_env, + Register robj, + Register tmp, + Label& slowpath) { + BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); + + assert_different_registers(jni_env, robj, tmp); + + // Resolve jobject + BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); + + // Compute the offset of address bad mask from the field of jni_environment + long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - + in_bytes(JavaThread::jni_environment_offset())); + + // Load the address bad mask + __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); + + // Check address bad mask + __ andr(tmp, robj, tmp); + __ bnez(tmp, slowpath); + + BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); +} + +#ifdef COMPILER2 + +OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; + } + + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_FloatRegister()) { + return opto_reg & ~1; + } + + return opto_reg; +} + +#undef __ +#define __ _masm-> + +class ZSaveLiveRegisters { +private: + MacroAssembler* const _masm; + RegSet _gp_regs; + FloatRegSet _fp_regs; + VectorRegSet _vp_regs; + +public: + void initialize(ZLoadBarrierStubC2* stub) { + // Record registers that needs to be saved/restored + RegMaskIterator rmi(stub->live()); + while (rmi.has_next()) { + const OptoReg::Name opto_reg = rmi.next(); + if (OptoReg::is_reg(opto_reg)) { + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + _gp_regs += RegSet::of(vm_reg->as_Register()); + } else if (vm_reg->is_FloatRegister()) { + _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); + } else if (vm_reg->is_VectorRegister()) { + const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); + _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); + } else { + fatal("Unknown register type"); + } + } + } + + // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated + _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); + } + + ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _gp_regs(), + _fp_regs(), + _vp_regs() { + // Figure out what registers to save/restore + initialize(stub); + + // Save registers + __ push_reg(_gp_regs, sp); + __ push_fp(_fp_regs, sp); + __ push_vp(_vp_regs, sp); + } + + ~ZSaveLiveRegisters() { + // Restore registers + __ pop_vp(_vp_regs, sp); + __ pop_fp(_fp_regs, sp); + __ pop_reg(_gp_regs, sp); + } +}; + +class ZSetupArguments { +private: + MacroAssembler* const _masm; + const Register _ref; + const Address _ref_addr; + +public: + ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _ref(stub->ref()), + _ref_addr(stub->ref_addr()) { + + // Setup arguments + if (_ref_addr.base() == noreg) { + // No self healing + if (_ref != c_rarg0) { + __ mv(c_rarg0, _ref); + } + __ mv(c_rarg1, zr); + } else { + // Self healing + if (_ref == c_rarg0) { + // _ref is already at correct place + __ la(c_rarg1, _ref_addr); + } else if (_ref != c_rarg1) { + // _ref is in wrong place, but not in c_rarg1, so fix it first + __ la(c_rarg1, _ref_addr); + __ mv(c_rarg0, _ref); + } else if (_ref_addr.base() != c_rarg0) { + assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); + __ mv(c_rarg0, _ref); + __ la(c_rarg1, _ref_addr); + } else { + assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); + if (_ref_addr.base() == c_rarg0) { + __ mv(t1, c_rarg1); + __ la(c_rarg1, _ref_addr); + __ mv(c_rarg0, t1); + } else { + ShouldNotReachHere(); + } + } + } + } + + ~ZSetupArguments() { + // Transfer result + if (_ref != x10) { + __ mv(_ref, x10); + } + } +}; + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { + BLOCK_COMMENT("ZLoadBarrierStubC2"); + + // Stub entry + __ bind(*stub->entry()); + + { + ZSaveLiveRegisters save_live_registers(masm, stub); + ZSetupArguments setup_arguments(masm, stub); + int32_t offset = 0; + __ la_patchable(t0, stub->slow_path(), offset); + __ jalr(x1, t0, offset); + } + + // Stub exit + __ j(*stub->continuation()); +} + +#undef __ + +#endif // COMPILER2 + +#ifdef COMPILER1 +#undef __ +#define __ ce->masm()-> + +void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, + LIR_Opr ref) const { + assert_different_registers(xthread, ref->as_register(), t1); + __ ld(t1, address_bad_mask_from_thread(xthread)); + __ andr(t1, t1, ref->as_register()); +} + +void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const { + // Stub entry + __ bind(*stub->entry()); + + Register ref = stub->ref()->as_register(); + Register ref_addr = noreg; + Register tmp = noreg; + + if (stub->tmp()->is_valid()) { + // Load address into tmp register + ce->leal(stub->ref_addr(), stub->tmp()); + ref_addr = tmp = stub->tmp()->as_pointer_register(); + } else { + // Address already in register + ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); + } + + assert_different_registers(ref, ref_addr, noreg); + + // Save x10 unless it is the result or tmp register + // Set up SP to accomodate parameters and maybe x10. + if (ref != x10 && tmp != x10) { + __ sub(sp, sp, 32); + __ sd(x10, Address(sp, 16)); + } else { + __ sub(sp, sp, 16); + } + + // Setup arguments and call runtime stub + ce->store_parameter(ref_addr, 1); + ce->store_parameter(ref, 0); + + __ far_call(stub->runtime_stub()); + + // Verify result + __ verify_oop(x10, "Bad oop"); + + + // Move result into place + if (ref != x10) { + __ mv(ref, x10); + } + + // Restore x10 unless it is the result or tmp register + if (ref != x10 && tmp != x10) { + __ ld(x10, Address(sp, 16)); + __ add(sp, sp, 32); + } else { + __ add(sp, sp, 16); + } + + // Stub exit + __ j(*stub->continuation()); +} + +#undef __ +#define __ sasm-> + +void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) const { + __ prologue("zgc_load_barrier stub", false); + + __ push_call_clobbered_registers_except(RegSet::of(x10)); + + // Setup arguments + __ load_parameter(0, c_rarg0); + __ load_parameter(1, c_rarg1); + + __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); + + __ pop_call_clobbered_registers_except(RegSet::of(x10)); + + __ epilogue(); +} + +#undef __ +#endif // COMPILER1 Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP + +#include "code/vmreg.hpp" +#include "oops/accessDecorators.hpp" +#ifdef COMPILER2 +#include "opto/optoreg.hpp" +#endif // COMPILER2 + +#ifdef COMPILER1 +class LIR_Assembler; +class LIR_OprDesc; +typedef LIR_OprDesc* LIR_Opr; +class StubAssembler; +class ZLoadBarrierStubC1; +#endif // COMPILER1 + +#ifdef COMPILER2 +class Node; +class ZLoadBarrierStubC2; +#endif // COMPILER2 + +class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { +public: + virtual void load_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Register dst, + Address src, + Register tmp1, + Register tmp_thread); + +#ifdef ASSERT + virtual void store_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, + Address dst, + Register val, + Register tmp1, + Register tmp2); +#endif // ASSERT + + virtual void arraycopy_prologue(MacroAssembler* masm, + DecoratorSet decorators, + bool is_oop, + Register src, + Register dst, + Register count, + RegSet saved_regs); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, + Register jni_env, + Register robj, + Register tmp, + Label& slowpath); + +#ifdef COMPILER1 + void generate_c1_load_barrier_test(LIR_Assembler* ce, + LIR_Opr ref) const; + + void generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const; + + void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, + DecoratorSet decorators) const; +#endif // COMPILER1 + +#ifdef COMPILER2 + OptoReg::Name refine_register(const Node* node, + OptoReg::Name opto_reg); + + void generate_c2_load_barrier_stub(MacroAssembler* masm, + ZLoadBarrierStubC2* stub) const; +#endif // COMPILER2 +}; + +#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/gcLogPrecious.hpp" +#include "gc/shared/gc_globals.hpp" +#include "gc/z/zGlobals.hpp" +#include "runtime/globals.hpp" +#include "runtime/os.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/powerOfTwo.hpp" + +#ifdef LINUX +#include +#endif // LINUX + +// +// The heap can have three different layouts, depending on the max heap size. +// +// Address Space & Pointer Layout 1 +// -------------------------------- +// +// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +// . . +// . . +// . . +// +--------------------------------+ 0x0000014000000000 (20TB) +// | Remapped View | +// +--------------------------------+ 0x0000010000000000 (16TB) +// . . +// +--------------------------------+ 0x00000c0000000000 (12TB) +// | Marked1 View | +// +--------------------------------+ 0x0000080000000000 (8TB) +// | Marked0 View | +// +--------------------------------+ 0x0000040000000000 (4TB) +// . . +// +--------------------------------+ 0x0000000000000000 +// +// 6 4 4 4 4 +// 3 6 5 2 1 0 +// +--------------------+----+-----------------------------------------------+ +// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| +// +--------------------+----+-----------------------------------------------+ +// | | | +// | | * 41-0 Object Offset (42-bits, 4TB address space) +// | | +// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) +// | 0010 = Marked1 (Address view 8-12TB) +// | 0100 = Remapped (Address view 16-20TB) +// | 1000 = Finalizable (Address view N/A) +// | +// * 63-46 Fixed (18-bits, always zero) +// +// +// Address Space & Pointer Layout 2 +// -------------------------------- +// +// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +// . . +// . . +// . . +// +--------------------------------+ 0x0000280000000000 (40TB) +// | Remapped View | +// +--------------------------------+ 0x0000200000000000 (32TB) +// . . +// +--------------------------------+ 0x0000180000000000 (24TB) +// | Marked1 View | +// +--------------------------------+ 0x0000100000000000 (16TB) +// | Marked0 View | +// +--------------------------------+ 0x0000080000000000 (8TB) +// . . +// +--------------------------------+ 0x0000000000000000 +// +// 6 4 4 4 4 +// 3 7 6 3 2 0 +// +------------------+-----+------------------------------------------------+ +// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| +// +-------------------+----+------------------------------------------------+ +// | | | +// | | * 42-0 Object Offset (43-bits, 8TB address space) +// | | +// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) +// | 0010 = Marked1 (Address view 16-24TB) +// | 0100 = Remapped (Address view 32-40TB) +// | 1000 = Finalizable (Address view N/A) +// | +// * 63-47 Fixed (17-bits, always zero) +// +// +// Address Space & Pointer Layout 3 +// -------------------------------- +// +// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +// . . +// . . +// . . +// +--------------------------------+ 0x0000500000000000 (80TB) +// | Remapped View | +// +--------------------------------+ 0x0000400000000000 (64TB) +// . . +// +--------------------------------+ 0x0000300000000000 (48TB) +// | Marked1 View | +// +--------------------------------+ 0x0000200000000000 (32TB) +// | Marked0 View | +// +--------------------------------+ 0x0000100000000000 (16TB) +// . . +// +--------------------------------+ 0x0000000000000000 +// +// 6 4 4 4 4 +// 3 8 7 4 3 0 +// +------------------+----+-------------------------------------------------+ +// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| +// +------------------+----+-------------------------------------------------+ +// | | | +// | | * 43-0 Object Offset (44-bits, 16TB address space) +// | | +// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) +// | 0010 = Marked1 (Address view 32-48TB) +// | 0100 = Remapped (Address view 64-80TB) +// | 1000 = Finalizable (Address view N/A) +// | +// * 63-48 Fixed (16-bits, always zero) +// + +// Default value if probing is not implemented for a certain platform: 128TB +static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; +// Minimum value returned, if probing fails: 64GB +static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; + +static size_t probe_valid_max_address_bit() { +#ifdef LINUX + size_t max_address_bit = 0; + const size_t page_size = os::vm_page_size(); + for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { + const uintptr_t base_addr = ((uintptr_t) 1U) << i; + if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { + // msync suceeded, the address is valid, and maybe even already mapped. + max_address_bit = i; + break; + } + if (errno != ENOMEM) { + // Some error occured. This should never happen, but msync + // has some undefined behavior, hence ignore this bit. +#ifdef ASSERT + fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +#else // ASSERT + log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +#endif // ASSERT + continue; + } + // Since msync failed with ENOMEM, the page might not be mapped. + // Try to map it, to see if the address is valid. + void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); + if (result_addr != MAP_FAILED) { + munmap(result_addr, page_size); + } + if ((uintptr_t) result_addr == base_addr) { + // address is valid + max_address_bit = i; + break; + } + } + if (max_address_bit == 0) { + // probing failed, allocate a very high page and take that bit as the maximum + const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; + void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); + if (result_addr != MAP_FAILED) { + max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; + munmap(result_addr, page_size); + } + } + log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); + return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); +#else // LINUX + return DEFAULT_MAX_ADDRESS_BIT; +#endif // LINUX +} + +size_t ZPlatformAddressOffsetBits() { + const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; + const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; + const size_t min_address_offset_bits = max_address_offset_bits - 2; + const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); + const size_t address_offset_bits = log2i_exact(address_offset); + return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +} + +size_t ZPlatformAddressMetadataShift() { + return ZPlatformAddressOffsetBits(); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP + +const size_t ZPlatformGranuleSizeShift = 21; // 2MB +const size_t ZPlatformHeapViews = 3; +const size_t ZPlatformCacheLineSize = 64; + +size_t ZPlatformAddressOffsetBits(); +size_t ZPlatformAddressMetadataShift(); + +#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad @@ -0,0 +1,233 @@ +// +// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/shared/gc_globals.hpp" +#include "gc/z/c2/zBarrierSetC2.hpp" +#include "gc/z/zThreadLocalData.hpp" + +%} + +source %{ + +static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { + if (barrier_data == ZLoadBarrierElided) { + return; + } + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); + __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(tmp, tmp, ref); + __ bnez(tmp, *stub->entry(), true /* far */); + __ bind(*stub->continuation()); +} + +static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); + __ j(*stub->entry()); + __ bind(*stub->continuation()); +} + +%} + +// Load Pointer +instruct zLoadP(iRegPNoSp dst, memory mem) +%{ + match(Set dst (LoadP mem)); + predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); + effect(TEMP dst); + + ins_cost(4 * DEFAULT_COST); + + format %{ "ld $dst, $mem, #@zLoadP" %} + + ins_encode %{ + const Address ref_addr (as_Register($mem$$base), $mem$$disp); + __ ld($dst$$Register, ref_addr); + z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); + %} + + ins_pipe(iload_reg_mem); +%} + +instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(KILL cr, TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" + "mv $res, $res == $oldval" %} + + ins_encode %{ + Label failed; + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ beqz($res$$Register, failed); + __ mv(t0, $oldval$$Register); + __ bind(failed); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); + __ andr(t1, t1, t0); + __ beqz(t1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); + effect(KILL cr, TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" + "mv $res, $res == $oldval" %} + + ins_encode %{ + Label failed; + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ beqz($res$$Register, failed); + __ mv(t0, $oldval$$Register); + __ bind(failed); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); + __ andr(t1, t1, t0); + __ beqz(t1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, + true /* result_as_bool */); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} + + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(t0, t0, $res$$Register); + __ beqz(t0, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP_DEF res); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} + + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(t0, t0, $res$$Register); + __ beqz(t0, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ + match(Set prev (GetAndSetP mem newv)); + predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + effect(TEMP_DEF prev, KILL cr); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} + + ins_encode %{ + __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); + %} + + ins_pipe(pipe_serial); +%} + +instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ + match(Set prev (GetAndSetP mem newv)); + predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); + effect(TEMP_DEF prev, KILL cr); + + ins_cost(VOLATILE_REF_COST); + + format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} + + ins_encode %{ + __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); + %} + ins_pipe(pipe_serial); +%} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP +#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + +const int StackAlignmentInBytes = 16; + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = false; + +// RISCV has adopted a multicopy atomic model closely following +// that of ARMv8. +#define CPU_MULTI_COPY_ATOMIC + +// To be safe, we deoptimize when we come across an access that needs +// patching. This is similar to what is done on aarch64. +#define DEOPTIMIZE_WHEN_PATCHING + +#define SUPPORTS_NATIVE_CX8 + +#define SUPPORT_RESERVED_STACK_AREA + +#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false + +#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globals_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_GLOBALS_RISCV_HPP +#define CPU_RISCV_GLOBALS_RISCV_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 64); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the +// stack if compiled for unix and LP64. To pass stack overflow tests we need +// 20 shadow pages. +#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) +#define DEFAULT_STACK_RESERVED_PAGES (1) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, PreserveFramePointer, false); + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); + +// Clear short arrays bigger than one word in an arch-specific way +define_pd_global(intx, InitArrayShortSize, BytesPerLong); + +define_pd_global(intx, InlineSmallCode, 1000); + +#define ARCH_FLAGS(develop, \ + product, \ + notproduct, \ + range, \ + constraint) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + product(intx, BlockZeroingLowLimit, 256, \ + "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ + product(bool, TraceTraps, false, "Trace all traps the signal handler") \ + /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ + product(bool, UseConservativeFence, true, \ + "Extend i for r and o for w in the pred/succ flags of fence;" \ + "Extend fence.i to fence.i + fence.") \ + product(bool, AvoidUnalignedAccesses, true, \ + "Avoid generating unaligned memory accesses") \ + product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ + product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ + product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ + product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ + "Use RVV instructions for left/right shift of BigInteger") + +#endif // CPU_RISCV_GLOBALS_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icBuffer_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icBuffer_riscv.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/oop.inline.hpp" + +int InlineCacheBuffer::ic_stub_code_size() { + // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) + // 5: auipc + ld + j + address(2 * instruction_size) + return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; +} + +#define __ masm-> + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + assert_cond(code_begin != NULL && entry_point != NULL); + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // Note: even though the code contains an embedded value, we do not need reloc info + // because + // (1) the value is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + + address start = __ pc(); + Label l; + __ ld(t1, l); + __ far_jump(ExternalAddress(entry_point)); + __ align(wordSize); + __ bind(l); + __ emit_int64((intptr_t)cached_value); + // Only need to invalidate the 1st two instructions - not the whole ic stub + ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); + assert(__ pc() - start == ic_stub_code_size(), "must be"); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // The word containing the cached value is at the end of this IC buffer + uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); + void* o = (void*)*p; + return o; +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +#define __ _masm-> + +static int icache_flush(address addr, int lines, int magic) { + os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size))); + return magic; +} + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { + address start = (address)icache_flush; + *flush_icache_stub = (ICache::flush_icache_stub_t)start; + + // ICache::invalidate_range() contains explicit condition that the first + // call is invoked on the generated icache flush stub code range. + ICache::invalidate_range(start, 0); + + { + StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); + __ ret(); + } +} + +#undef __ Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/icache_riscv.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_ICACHE_RISCV_HPP +#define CPU_RISCV_ICACHE_RISCV_HPP + +// Interface for updating the instruction cache. Whenever the VM +// modifies code, part of the processor instruction cache potentially +// has to be flushed. + +class ICache : public AbstractICache { +public: + enum { + stub_size = 16, // Size of the icache flush stub in bytes + line_size = BytesPerWord, // conservative + log2_line_size = LogBytesPerWord // log2(line_size) + }; +}; + +#endif // CPU_RISCV_ICACHE_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -0,0 +1,1965 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interp_masm_riscv.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/powerOfTwo.hpp" + +void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type + ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); + ld(t0, Address(t0, Method::const_offset())); + lbu(t0, Address(t0, ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + mv(t1, T_INT); + beq(t0, t1, done); + + // mask integer result to narrower return type. + mv(t1, T_BOOLEAN); + bne(t0, t1, notBool); + + andi(result, result, 0x1); + j(done); + + bind(notBool); + mv(t1, T_BYTE); + bne(t0, t1, notByte); + sign_extend(result, result, 8); + j(done); + + bind(notByte); + mv(t1, T_CHAR); + bne(t0, t1, notChar); + zero_extend(result, result, 16); + j(done); + + bind(notChar); + sign_extend(result, result, 16); + + // Nothing to do for T_INT + bind(done); + addw(result, result, zr); +} + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry != NULL, "Entry must have been generated by now"); + j(entry); +} + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, + // it means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + lwu(t1, Address(xthread, JavaThread::popframe_condition_offset())); + andi(t0, t1, JavaThread::popframe_pending_bit); + beqz(t0, L); + andi(t0, t1, JavaThread::popframe_processing_bit); + bnez(t0, L); + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + jr(x10); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset())); + const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset()); + switch (state) { + case atos: + ld(x10, oop_addr); + sd(zr, oop_addr); + verify_oop(x10); + break; + case ltos: + ld(x10, val_addr); + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + lwu(x10, val_addr); + break; + case ftos: + flw(f10, val_addr); + break; + case dtos: + fld(f10, val_addr); + break; + case vtos: + /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } + // Clean up tos value in the thread object + mvw(t0, (int) ilgl); + sw(t0, tos_addr); + sw(zr, val_addr); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); + beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset())); + mv(t1, JvmtiThreadState::earlyret_pending); + bne(t0, t1, L); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); + lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0); + jr(x10); + bind(L); + } +} + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + lhu(reg, Address(xbcp, bcp_offset)); + revb_h(reg, reg); +} + +void InterpreterMacroAssembler::get_dispatch() { + int32_t offset = 0; + la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); + addi(xdispatch, xdispatch, offset); +} + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + load_unsigned_short(index, Address(xbcp, bcp_offset)); + } else if (index_size == sizeof(u4)) { + lwu(index, Address(xbcp, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + xori(index, index, -1); + addw(index, index, zr); + } else if (index_size == sizeof(u1)) { + load_unsigned_byte(index, Address(xbcp, bcp_offset)); + } else { + ShouldNotReachHere(); + } +} + +// Return +// Rindex: index into constant pool +// Rcache: address of cache entry - ConstantPoolCache::base_offset() +// +// A caller must add ConstantPoolCache::base_offset() to Rcache to get +// the true address of the cache entry. +// +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + assert_different_registers(cache, xcpool); + get_cache_index_at_bcp(index, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // Convert from field index to ConstantPoolCacheEntry + // riscv already has the cache in xcpool so there is no need to + // install it in cache. Instead we pre-add the indexed offset to + // xcpool and return it in cache. All clients of this method need to + // be modified accordingly. + shadd(cache, index, xcpool, cache, 5); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + // n.b. unlike x86 cache already includes the index offset + la(bytecode, Address(cache, + ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::indices_offset())); + membar(MacroAssembler::AnyAny); + lwu(bytecode, bytecode); + membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + const int shift_count = (1 + byte_no) * BitsPerByte; + slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); + srli(bytecode, bytecode, XLEN - BitsPerByte); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // Convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, + "else change next line"); + ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + // construct pointer to cache entry + shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index, Register tmp) { + assert_different_registers(result, index); + + get_constant_pool(result); + // Load pointer for resolved_references[] objArray + ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); + ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); + resolve_oop_handle(result, tmp); + // Add in the index + addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + shadd(result, index, result, index, LogBytesPerHeapOop); + load_heap_oop(result, Address(result, 0)); +} + +void InterpreterMacroAssembler::load_resolved_klass_at_offset( + Register cpool, Register index, Register klass, Register temp) { + shadd(temp, index, cpool, temp, LogBytesPerWord); + lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index + ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses + shadd(klass, temp, klass, temp, LogBytesPerWord); + ld(klass, Address(klass, Array::base_offset_in_bytes())); +} + +void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, + Register method, + Register cache) { + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == TemplateTable::f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + + ld(method, Address(cache, method_offset)); // get f1 Method* +} + +// Generate a subtype check: branch to ok_is_subtype if sub_klass is a +// subtype of super_klass. +// +// Args: +// x10: superklass +// Rsub_klass: subklass +// +// Kills: +// x12, x15 +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Label& ok_is_subtype) { + assert(Rsub_klass != x10, "x10 holds superklass"); + assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); + assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr"); + + // Profile the not-null value's klass. + profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15 + + // Do the check. + check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 + + // Profile the failure of the check. + profile_typecheck_failed(x12); // blows x12 +} + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ld(r, Address(esp, 0)); + addi(esp, esp, wordSize); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + lw(r, Address(esp, 0)); // lw do signed extended + addi(esp, esp, wordSize); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + ld(r, Address(esp, 0)); + addi(esp, esp, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + addi(esp, esp, -wordSize); + sd(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::push_i(Register r) { + addi(esp, esp, -wordSize); + addw(r, r, zr); // signed extended + sd(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::push_l(Register r) { + addi(esp, esp, -2 * wordSize); + sd(zr, Address(esp, wordSize)); + sd(r, Address(esp)); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + flw(r, esp, 0); + addi(esp, esp, wordSize); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + fld(r, esp, 0); + addi(esp, esp, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + addi(esp, esp, -wordSize); + fsw(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + addi(esp, esp, -2 * wordSize); + fsd(r, Address(esp, 0)); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: + pop_ptr(); + verify_oop(x10); + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + pop_i(); + break; + case ltos: + pop_l(); + break; + case ftos: + pop_f(); + break; + case dtos: + pop_d(); + break; + case vtos: + /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } +} + +void InterpreterMacroAssembler::push(TosState state) { + switch (state) { + case atos: + verify_oop(x10); + push_ptr(); + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + push_i(); + break; + case ltos: + push_l(); + break; + case ftos: + push_f(); + break; + case dtos: + push_d(); + break; + case vtos: + /* nothing to do */ + break; + default: + ShouldNotReachHere(); + } +} + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::load_float(Address src) { + flw(f10, src); +} + +void InterpreterMacroAssembler::load_double(Address src) { + fld(f10, src); +} + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { + // set sender sp + mv(x30, sp); + // record last_sp + sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method) { + prepare_to_jump_from_interpreted(); + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); + beqz(t0, run_compiled_code); + ld(t0, Address(method, Method::interpreter_entry_offset())); + jr(t0); + bind(run_compiled_code); + } + + ld(t0, Address(method, Method::from_interpreted_offset())); + jr(t0); +} + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. amd64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop, + bool generate_poll, + Register Rs) { + // Pay attention to the argument Rs, which is acquiesce in t0. + if (VerifyActivationFrameSize) { + Unimplemented(); + } + if (verifyoop && state == atos) { + verify_oop(x10); + } + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); + bool needs_thread_local_poll = generate_poll && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); + ld(t1, Address(xthread, JavaThread::polling_word_offset())); + andi(t1, t1, SafepointMechanism::poll_bit()); + bnez(t1, safepoint); + } + if (table == Interpreter::dispatch_table(state)) { + li(t1, Interpreter::distance_from_dispatch_table(state)); + add(t1, Rs, t1); + shadd(t1, t1, xdispatch, t1, 3); + } else { + mv(t1, (address)table); + shadd(t1, Rs, t1, Rs, 3); + } + ld(t1, Address(t1)); + jr(t1); + + if (needs_thread_local_poll) { + bind(safepoint); + la(t1, ExternalAddress((address)safepoint_table)); + shadd(t1, Rs, t1, Rs, 3); + ld(t1, Address(t1)); + jr(t1); + } +} + +void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) { + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) { + dispatch_base(state, Interpreter::normal_table(state), Rs); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) { + dispatch_base(state, Interpreter::normal_table(state), false, Rs); +} + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { + // load next bytecode + load_unsigned_byte(t0, Address(xbcp, step)); + add(xbcp, xbcp, step); + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + lbu(t0, Address(xbcp, 0)); + dispatch_base(state, table); +} + +// remove activation +// +// Apply stack watermark barrier. +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation( + TosState state, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers x13 may be in use for the + // result check if synchronized method + Label unlocked, unlock, no_unlock; + + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); + j(fast_path); + + bind(slow_path); + push(state); + set_last_Java_frame(esp, fp, (address)pc(), t0); + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); + reset_last_Java_frame(true); + pop(state); + + bind(fast_path); + + // get the value of _do_not_unlock_if_synchronized into x13 + const Address do_not_unlock_if_synchronized(xthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + lbu(x13, do_not_unlock_if_synchronized); + sb(zr, do_not_unlock_if_synchronized); // reset the flag + + // get method access flags + ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize)); + ld(x12, Address(x11, Method::access_flags_offset())); + andi(t0, x12, JVM_ACC_SYNCHRONIZED); + beqz(t0, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag + // is set. + bnez(x13, no_unlock); + + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + const Address monitor(fp, frame::interpreter_frame_initial_sp_offset * + wordSize - (int) sizeof(BasicObjectLock)); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + la(c_rarg1, monitor); // address of first monitor + + ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + bnez(x10, unlock); + + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + j(unlocked); + } + + bind(unlock); + unlock_object(c_rarg1); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // x10: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top( + fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + const Address monitor_block_bot( + fp, frame::interpreter_frame_initial_sp_offset * wordSize); + + bind(restart); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + ld(c_rarg1, monitor_block_top); // points to current entry, starting + // with top-most entry + la(x9, monitor_block_bot); // points to word before bottom of + // monitor block + + j(entry); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime:: + throw_illegal_monitor_state_exception)); + + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception. + // Unlock does not block, so don't have to worry about the frame. + // We don't have to preserve c_rarg1 since we are going to throw an exception. + + push(state); + unlock_object(c_rarg1); + pop(state); + + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + new_illegal_monitor_state_exception)); + } + + j(restart); + } + + bind(loop); + // check if current entry is used + add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes()); + ld(t0, Address(t0, 0)); + bnez(t0, exception); + + add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry + bind(entry); + bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmti support + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + // get sender esp + ld(t1, + Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); + if (StackReservedPages > 0) { + // testing if reserved zone needs to be re-enabled + Label no_reserved_zone_enabling; + + ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); + ble(t1, t0, no_reserved_zone_enabling); + + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_delayed_StackOverflowError)); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); + } + + // restore sender esp + mv(esp, t1); + + // remove frame anchor + leave(); + // If we're returning to interpreted code we will shortly be + // adjusting SP to allow some space for ESP. If we're returning to + // compiled code the saved sender SP was saved in sender_sp, so this + // restores it. + andi(sp, esp, -16); +} + +// Lock object +// +// Args: +// c_rarg1: BasicObjectLock to be used for locking +// +// Kills: +// x10 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) +// t0, t1 (temp regs) +void InterpreterMacroAssembler::lock_object(Register lock_reg) +{ + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); + if (UseHeavyMonitors) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } else { + Label done; + + const Register swap_reg = x10; + const Register tmp = c_rarg2; + const Register obj_reg = c_rarg3; // Will contain the oop + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + + BasicLock::displaced_header_offset_in_bytes(); + + Label slow_case; + + // Load object pointer into obj_reg c_rarg3 + ld(obj_reg, Address(lock_reg, obj_offset)); + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp, obj_reg); + lwu(tmp, Address(tmp, Klass::access_flags_offset())); + andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); + bnez(tmp, slow_case); + } + + if (UseBiasedLocking) { + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); + } + + // Load (object->mark() | 1) into swap_reg + ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + ori(swap_reg, t0, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + sd(swap_reg, Address(lock_reg, mark_offset)); + + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + + if (PrintBiasedLockingStatistics) { + Label fast, fail; + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); + bind(fast); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + t1, t0); + j(done); + bind(fail); + } else { + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (7 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg + sub(swap_reg, swap_reg, sp); + li(t0, (int64_t)(7 - os::vm_page_size())); + andr(swap_reg, swap_reg, t0); + + // Save the test result, for recursive case, the result is zero + sd(swap_reg, Address(lock_reg, mark_offset)); + + if (PrintBiasedLockingStatistics) { + bnez(swap_reg, slow_case); + atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), + t1, t0); + } + beqz(swap_reg, done); + + bind(slow_case); + + // Call the runtime routine for slow case + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg1: BasicObjectLock for lock +// +// Kills: +// x10 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) +// t0, t1 (temp regs) +void InterpreterMacroAssembler::unlock_object(Register lock_reg) +{ + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); + + if (UseHeavyMonitors) { + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + + const Register swap_reg = x10; + const Register header_reg = c_rarg2; // Will contain the old oopMark + const Register obj_reg = c_rarg3; // Will contain the oop + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into x10 + la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + + // Load oop into obj_reg(c_rarg3) + ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + // Free entry + sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } + + // Load the old header from BasicLock structure + ld(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); + + // Test for recursion + beqz(header_reg, done); + + // Atomic swap back the old header + cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL); + + // Call the runtime routine for slow case. + sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + + bind(done); + + restore_bcp(); + } +} + + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + beqz(mdp, zero_continue); +} + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + push_reg(0xc00, sp); // save x10, x11 + + // Test MDO to avoid the call if it is NULL. + ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); + beqz(x10, set_mdp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp); + // x10: mdi + // mdo is guaranteed to be non-zero here, we checked for it before the call. + ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); + la(x11, Address(x11, in_bytes(MethodData::data_offset()))); + add(x10, x11, x10); + sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + bind(set_mdp); + pop_reg(0xc00, sp); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + add(sp, sp, -4 * wordSize); + sd(x10, Address(sp, 0)); + sd(x11, Address(sp, wordSize)); + sd(x12, Address(sp, 2 * wordSize)); + sd(x13, Address(sp, 3 * wordSize)); + test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue + get_method(x11); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + lh(x12, Address(x13, in_bytes(DataLayout::bci_offset()))); + ld(t0, Address(x11, Method::const_offset())); + add(x12, x12, t0); + la(x12, Address(x12, ConstMethod::codes_offset())); + beq(x12, xbcp, verify_continue); + // x10: method + // xbcp: bcp // xbcp == 22 + // x13: mdp + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), + x11, xbcp, x13); + bind(verify_continue); + ld(x10, Address(sp, 0)); + ld(x11, Address(sp, wordSize)); + ld(x12, Address(sp, 2 * wordSize)); + ld(x13, Address(sp, 3 * wordSize)); + add(sp, sp, 4 * wordSize); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + sd(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + increment_mdp_data_at(mdp_in, noreg, constant, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + + assert_different_registers(t1, t0, mdp_in, reg); + + Address addr1(mdp_in, constant); + Address addr2(t1, 0); + Address &addr = addr1; + if (reg != noreg) { + la(t1, addr1); + add(t1, t1, reg); + addr = addr2; + } + + if (decrement) { + ld(t0, addr); + addi(t0, t0, -DataLayout::counter_increment); + Label L; + bltz(t0, L); // skip store if counter underflow + sd(t0, addr); + bind(L); + } else { + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + ld(t0, addr); + addi(t0, t0, DataLayout::counter_increment); + Label L; + blez(t0, L); // skip store if counter overflow + sd(t0, addr); + bind(L); + } +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int flags_offset = in_bytes(DataLayout::flags_offset()); + // Set the flag + lbu(t1, Address(mdp_in, flags_offset)); + ori(t1, t1, flag_byte_constant); + sb(t1, Address(mdp_in, flags_offset)); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ld(t1, Address(mdp_in, offset)); + bne(value, t1, not_equal_continue); + } else { + // Put the test value into a register, so caller can use it: + ld(test_value_out, Address(mdp_in, offset)); + bne(value, test_value_out, not_equal_continue); + } +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld(t1, Address(mdp_in, offset_of_disp)); + add(mdp_in, mdp_in, t1); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(t1, mdp_in, reg); + ld(t1, Address(t1, offset_of_disp)); + add(mdp_in, mdp_in, t1); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + addi(mdp_in, mdp_in, (unsigned)constant); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + + // save/restore across call_VM + addi(sp, sp, -2 * wordSize); + sd(zr, Address(sp, 0)); + sd(return_bci, Address(sp, wordSize)); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + ld(zr, Address(sp, 0)); + ld(return_bci, Address(sp, wordSize)); + addi(sp, sp, 2 * wordSize); +} + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + Address data(mdp, in_bytes(JumpData::taken_offset())); + ld(bumped_count, data); + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + addi(bumped_count, bumped_count, DataLayout::counter_increment); + Label L; + // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0; + blez(bumped_count, L); // skip store if counter overflow, + sd(bumped_count, data); + bind(L); + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + j(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } + + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } + + record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } +} + +void InterpreterMacroAssembler::record_item_in_profile_helper( + Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) { + int last_row = total_rows - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the item and for null. + // Take any of three different outcomes: + // 1. found item => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the item is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(mdp, item_offset, item, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the item from the CallData.) + + // The item is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); + increment_mdp_data_at(mdp, count_offset); + j(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on item[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (non_profiled_offset >= 0) { + beqz(reg2, found_null); + // Item did not match any saved item and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, non_profiled_offset); + j(done); + bind(found_null); + } else { + bnez(reg2, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + beqz(reg2, found_null); + + // Put all the "Case 3" tests here. + record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); + + // Found a null. Keep searching for a matching item, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(mdp, item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); + mv(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + j(done); + } +} + +// Example state machine code for three profile rows: +// # main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) then [ +// row[0].incr() +// goto done +// ] +// if (row[0].rec != NULL) then [ +// # inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) then [ +// row[1].incr() +// goto done +// ] +// if (row[1].rec != NULL) then [ +// # degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) then [ +// row[2].incr() +// goto done +// ] +// if (row[2].rec != NULL) then [ +// count.incr() +// goto done +// ] # overflow +// row[2].init(rec) +// goto done +// ] else [ +// # remember row[1] is empty +// if (row[2].rec == rec) then [ +// row[2].incr() +// goto done +// ] +// row[1].init(rec) +// goto done +// ] +// else [ +// # remember row[0] is empty +// if (row[1].rec == rec) then [ +// row[1].incr() +// goto done +// ] +// if (row[2].rec == rec) then [ +// row[2].incr() +// goto done +// ] +// row[0].init(rec) +// goto done +// ] +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call); + + bind(done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (uint row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + j(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + mvw(reg2, in_bytes(MultiBranchData::per_case_size())); + mvw(t0, in_bytes(MultiBranchData::case_array_offset())); + Assembler::mul(index, index, reg2); + Assembler::add(index, index, t0); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer need to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); + beqz(x13, L); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + xthread, c_rarg1); + } + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + get_method(c_rarg1); + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + xthread, c_rarg1); + } +} + + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label L; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // template interpreter will leave the result on the top of the stack. + push(state); + lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); + beqz(x13, L); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(L); + pop(state); + } + + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + push(state); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + xthread, c_rarg1); + pop(state); + } +} + + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register tmp1, Register tmp2, + bool preloaded, Label* where) { + Label done; + if (!preloaded) { + lwu(tmp1, counter_addr); + } + add(tmp1, tmp1, increment); + sw(tmp1, counter_addr); + lwu(tmp2, mask); + andr(tmp1, tmp1, tmp2); + bnez(tmp1, done); + j(*where); // offset is too large so we have to use j instead of beqz here + bind(done); +} + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + // interpreter specific + // + // Note: No need to save/restore rbcp & rlocals pointer since these + // are callee saved registers and no blocking/ GC can happen + // in leaf calls. +#ifdef ASSERT + { + Label L; + ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + beqz(t0, L); + stop("InterpreterMacroAssembler::call_VM_leaf_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + save_bcp(); +#ifdef ASSERT + { + Label L; + ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + beqz(t0, L); + stop("InterpreterMacroAssembler::call_VM_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); +// interpreter specific + restore_bcp(); + restore_locals(); +} + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { + assert_different_registers(obj, tmp, t0, mdo_addr.base()); + Label update, next, none; + + verify_oop(obj); + + bnez(obj, update); + orptr(mdo_addr, TypeEntries::null_seen, t0, tmp); + j(next); + + bind(update); + load_klass(obj, obj); + + ld(t0, mdo_addr); + xorr(obj, obj, t0); + andi(t0, obj, TypeEntries::type_klass_mask); + beqz(t0, next); // klass seen before, nothing to + // do. The unknown bit may have been + // set already but no need to check. + + andi(t0, obj, TypeEntries::type_unknown); + bnez(t0, next); + // already unknown. Nothing to do anymore. + + ld(t0, mdo_addr); + beqz(t0, none); + li(tmp, (u1)TypeEntries::null_seen); + beq(t0, tmp, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + ld(t0, mdo_addr); + xorr(obj, obj, t0); + andi(t0, obj, TypeEntries::type_klass_mask); + beqz(t0, next); + + // different than before. Cannot keep accurate profile. + orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp); + j(next); + + bind(none); + // first time here. Set profile type. + sd(obj, mdo_addr); + + bind(next); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); + if (is_virtual) { + li(tmp, (u1)DataLayout::virtual_call_type_data_tag); + bne(t0, tmp, profile_continue); + } else { + li(tmp, (u1)DataLayout::call_type_data_tag); + bne(t0, tmp, profile_continue); + } + + // calculate slot step + static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0)); + static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0; + + // calculate type step + static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0)); + static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0; + + if (MethodData::profile_arguments()) { + Label done, loop, loopEnd, profileArgument, profileReturnType; + RegSet pushed_registers; + pushed_registers += x15; + pushed_registers += x16; + pushed_registers += x17; + Register mdo_addr = x15; + Register index = x16; + Register off_to_args = x17; + push_reg(pushed_registers, sp); + + mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset())); + mv(t0, TypeProfileArgsLimit); + beqz(t0, loopEnd); + + mv(index, zr); // index < TypeProfileArgsLimit + bind(loop); + bgtz(index, profileReturnType); + li(t0, (int)MethodData::profile_return()); + beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false + bind(profileReturnType); + // If return value type is profiled we may have no argument to profile + ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); + mv(t1, - TypeStackSlotEntries::per_arg_count()); + mul(t1, index, t1); + add(tmp, tmp, t1); + li(t1, TypeStackSlotEntries::per_arg_count()); + add(t0, mdp, off_to_args); + blt(tmp, t1, done); + + bind(profileArgument); + + ld(tmp, Address(callee, Method::const_offset())); + load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + li(t0, stack_slot_offset0); + li(t1, slot_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(t0, mdp, t0); + ld(t0, Address(t0)); + sub(tmp, tmp, t0); + addi(tmp, tmp, -1); + Address arg_addr = argument_address(tmp); + ld(tmp, arg_addr); + + li(t0, argument_type_offset0); + li(t1, type_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(mdo_addr, mdp, t0); + Address mdo_arg_addr(mdo_addr, 0); + profile_obj_type(tmp, mdo_arg_addr, t1); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + addi(off_to_args, off_to_args, to_add); + + // increment index by 1 + addi(index, index, 1); + li(t1, TypeProfileArgsLimit); + blt(index, t1, loop); + bind(loopEnd); + + if (MethodData::profile_return()) { + ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); + addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); + } + + add(t0, mdp, off_to_args); + bind(done); + mv(mdp, t0); + + // unspill the clobbered registers + pop_reg(pushed_registers, sp); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); + } + sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, xbcp, t0, t1); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + lbu(t0, Address(xbcp, 0)); + li(tmp, (u1)Bytecodes::_invokedynamic); + beq(t0, tmp, do_profile); + li(tmp, (u1)Bytecodes::_invokehandle); + beq(t0, tmp, do_profile); + get_method(tmp); + lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); + li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); + bne(t0, t1, profile_continue); + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + mv(tmp, ret); + profile_obj_type(tmp, mdo_ret_addr, t1); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) { + assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3); + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); + srli(tmp2, tmp1, 31); + bnez(tmp2, profile_continue); // i.e. sign bit set + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + add(mdp, mdp, tmp1); + ld(tmp1, Address(mdp, ArrayData::array_len_offset())); + add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + int per_arg_scale = exact_log2(DataLayout::cell_size); + add(t0, mdp, off_base); + add(t1, mdp, type_base); + + shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); + // load offset on the stack from the slot for this parameter + ld(tmp2, Address(tmp2, 0)); + neg(tmp2, tmp2); + + // read the parameter from the local area + shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); + ld(tmp2, Address(tmp2, 0)); + + // profile the parameter + shadd(t1, tmp1, t1, t0, per_arg_scale); + Address arg_type(t1, 0); + profile_obj_type(tmp2, arg_type, tmp3); + + // go to next parameter + add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); + bgez(tmp1, loop); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ld(mcs, Address(method, Method::method_counters_offset())); + bnez(mcs, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ld(mcs, Address(method, Method::method_counters_offset())); + beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory + bind(has_counters); +} + +#ifdef ASSERT +void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits, + const char* msg, bool stop_by_hit) { + Label L; + andi(t0, access_flags, flag_bits); + if (stop_by_hit) { + beqz(t0, L); + } else { + bnez(t0, L); + } + stop(msg); + bind(L); +} + +void InterpreterMacroAssembler::verify_frame_setup() { + Label L; + const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); + ld(t0, monitor_block_top); + beq(esp, t0, L); + stop("broken stack frame setup in interpreter"); + bind(L); +} +#endif Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interp_masm_riscv.hpp @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP +#define CPU_RISCV_INTERP_MASM_RISCV_HPP + +#include "asm/macroAssembler.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + +typedef ByteSize (*OffsetFunction)(uint); + +class InterpreterMacroAssembler: public MacroAssembler { + protected: + // Interpreter specific version of call_VM_base + using MacroAssembler::call_VM_leaf_base; + + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true, + bool generate_poll = false, Register Rs = t0); + + public: + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} + virtual ~InterpreterMacroAssembler() {} + + void load_earlyret_value(TosState state); + + void jump_to_entry(address entry); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // Interpreter-specific registers + void save_bcp() { + sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); + } + + void restore_bcp() { + ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); + } + + void restore_locals() { + ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize)); + } + + void restore_constant_pool_cache() { + ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); + } + + void get_dispatch(); + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize)); + } + + void get_const(Register reg) { + get_method(reg); + ld(reg, Address(reg, in_bytes(Method::const_offset()))); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // Load cpool->resolved_references(index). + void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); + + // Load cpool->resolved_klass_at(index). + void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + + void load_resolved_method_at_index(int byte_no, Register method, Register cache); + + void pop_ptr(Register r = x10); + void pop_i(Register r = x10); + void pop_l(Register r = x10); + void pop_f(FloatRegister r = f10); + void pop_d(FloatRegister r = f10); + void push_ptr(Register r = x10); + void push_i(Register r = x10); + void push_l(Register r = x10); + void push_f(FloatRegister r = f10); + void push_d(FloatRegister r = f10); + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void empty_expression_stack() { + ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + // NULL last_sp until next java call + sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); + } + + // Helpers for swap and dup + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Load float value from 'address'. The value is loaded onto the FPU register v0. + void load_float(Address src); + void load_double(Address src); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + // dispatch via t0 + void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0); + // dispatch normal table via t0 (assume t0 is loaded already) + void dispatch_only_normal(TosState state, Register Rs = t0); + void dispatch_only_noverify(TosState state, Register Rs = t0); + // load t0 from [xbcp + step] and dispatch via t0 + void dispatch_next(TosState state, int step = 0, bool generate_poll = false); + // load t0 from [xbcp] and dispatch via t0 and table + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); + + // FIXME: Give us a valid frame at a null check. + virtual void null_check(Register reg, int offset = -1) { + MacroAssembler::null_check(reg, offset); + } + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register tmp1, Register tmp2, + bool preloaded, Label* where); + + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, + Label& done, bool is_virtual_call); + void record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + // narrow int return value + void narrow(Register result); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register t1, + bool receiver_can_be_null = false); + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register temp); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register temp); + + void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); + + // Debugging + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); + + virtual void _call_Unimplemented(address call_site) { + save_bcp(); + set_last_Java_frame(esp, fp, (address) pc(), t0); + MacroAssembler::_call_Unimplemented(call_site); + } + +#ifdef ASSERT + void verify_access_flags(Register access_flags, uint32_t flag_bits, + const char* msg, bool stop_by_hit = true); + void verify_frame_setup(); +#endif +}; + +#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +// Implementation of SignatureHandlerGenerator +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } + +Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { + if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { + return g_INTArgReg[++_num_reg_int_args]; + } + return noreg; +} + +FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + return g_FPArgReg[_num_reg_fp_args++]; + } else { + return fnoreg; + } +} + +int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { + int ret = _stack_offset; + _stack_offset += wordSize; + return ret; +} + +InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( + const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); // allocate on resourse area by default + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; + _stack_offset = 0; +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + Register reg = next_gpr(); + if (reg != noreg) { + __ lw(reg, src); + } else { + __ lw(x10, src); + __ sw(x10, Address(to(), next_stack_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + Register reg = next_gpr(); + if (reg != noreg) { + __ ld(reg, src); + } else { + __ ld(x10, src); + __ sd(x10, Address(to(), next_stack_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + FloatRegister reg = next_fpr(); + if (reg != fnoreg) { + __ flw(reg, src); + } else { + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + pass_int(); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + FloatRegister reg = next_fpr(); + if (reg != fnoreg) { + __ fld(reg, src); + } else { + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + pass_long(); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + Register reg = next_gpr(); + if (reg == c_rarg1) { + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); + } else if (reg != noreg) { + // c_rarg2-c_rarg7 + __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); + __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... + __ ld(temp(), x10); + Label L; + __ beqz(temp(), L); + __ mv(reg, x10); + __ bind(L); + } else { + //to stack + __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); + __ ld(temp(), x10); + Label L; + __ bnez(temp(), L); + __ mv(x10, zr); + __ bind(L); + assert(sizeof(jobject) == wordSize, ""); + __ sd(x10, Address(to(), next_stack_offset())); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + + // return result handler + __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type()))); + __ ret(); + + __ flush(); +} + + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _int_args; + intptr_t* _fp_args; + intptr_t* _fp_identifiers; + unsigned int _num_reg_int_args; + unsigned int _num_reg_fp_args; + + intptr_t* single_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + return from_addr; + } + + intptr_t* double_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); + _from -= 2 * Interpreter::stackElementSize; + return from_addr; + } + + int pass_gpr(intptr_t value) { + if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { + *_int_args++ = value; + return _num_reg_int_args++; + } + return -1; + } + + int pass_fpr(intptr_t value) { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + *_fp_args++ = value; + return _num_reg_fp_args++; + } + return -1; + } + + void pass_stack(intptr_t value) { + *_to++ = value; + } + + virtual void pass_int() { + jint value = *(jint*)single_slot_addr(); + if (pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_long() { + intptr_t value = *double_slot_addr(); + if (pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_object() { + intptr_t* addr = single_slot_addr(); + intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; + if (pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_float() { + jint value = *(jint*) single_slot_addr(); + // a floating-point argument is passed according to the integer calling + // convention if no floating-point argument register available + if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { + pass_stack(value); + } + } + + virtual void pass_double() { + intptr_t value = *double_slot_addr(); + int arg = pass_fpr(value); + if (0 <= arg) { + *_fp_identifiers |= (1ull << arg); // mark as double + } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack + pass_stack(value); + } + } + + public: + SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + _int_args = to - (method->is_static() ? 16 : 17); + _fp_args = to - 8; + _fp_identifiers = to - 9; + *(int*) _fp_identifiers = 0; + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; + } + + ~SlowSignatureHandler() + { + _from = NULL; + _to = NULL; + _int_args = NULL; + _fp_args = NULL; + _fp_identifiers = NULL; + } +}; + + +JRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* current, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(current, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler ssh(m, (address)from, to); + ssh.iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +JRT_END Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP +#define CPU_RISCV_INTERPRETERRT_RISCV_HPP + +// This is included in the middle of class Interpreter. +// Do not include files here. + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + unsigned int _num_reg_fp_args; + unsigned int _num_reg_int_args; + int _stack_offset; + + void pass_int(); + void pass_long(); + void pass_float(); + void pass_double(); + void pass_object(); + + Register next_gpr(); + FloatRegister next_fpr(); + int next_stack_offset(); + + public: + // Creation + SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + virtual ~SignatureHandlerGenerator() { + _masm = NULL; + } + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP +#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + OrderAccess::release(); + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + assert(src != NULL, "Src should not be NULL."); + if (_last_Java_sp != src->_last_Java_sp) { + _last_Java_sp = NULL; + OrderAccess::release(); + } + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } + + void make_walkable(); + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + const address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + +#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "prims/jvmtiExport.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define BUFFER_SIZE 30*wordSize + +// Instead of issuing a LoadLoad barrier we create an address +// dependency between loads; this might be more efficient. + +// Common register usage: +// x10/f10: result +// c_rarg0: jni env +// c_rarg1: obj +// c_rarg2: jfield id + +static const Register robj = x13; +static const Register rcounter = x14; +static const Register roffset = x15; +static const Register rcounter_addr = x16; +static const Register result = x17; + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + name = NULL; // unreachable + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + int32_t offset = 0; + __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); + __ addi(rcounter_addr, rcounter_addr, offset); + + Address safepoint_counter_addr(rcounter_addr, 0); + __ lwu(rcounter, safepoint_counter_addr); + // An even value means there are no ongoing safepoint operations + __ andi(t0, rcounter, 1); + __ bnez(t0, slow); + + if (JvmtiExport::can_post_field_access()) { + // Using barrier to order wrt. JVMTI check and load of result. + __ membar(MacroAssembler::LoadLoad); + + // Check to see if a field access watch has been set before we + // take the fast path. + int32_t offset2; + __ la_patchable(result, + ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), + offset2); + __ lwu(result, Address(result, offset2)); + __ bnez(result, slow); + + __ mv(robj, c_rarg1); + } else { + // Using address dependency to order wrt. load of result. + __ xorr(robj, c_rarg1, rcounter); + __ xorr(robj, robj, rcounter); // obj, since + // robj ^ rcounter ^ rcounter == robj + // robj is address dependent on rcounter. + } + + // Both robj and t0 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + assert_cond(bs != NULL); + bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); + + __ srli(roffset, c_rarg2, 2); // offset + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); // Used by the segfault handler + __ add(roffset, robj, roffset); + + switch (type) { + case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; + case T_BYTE: __ lb(result, Address(roffset, 0)); break; + case T_CHAR: __ lhu(result, Address(roffset, 0)); break; + case T_SHORT: __ lh(result, Address(roffset, 0)); break; + case T_INT: __ lw(result, Address(roffset, 0)); break; + case T_LONG: __ ld(result, Address(roffset, 0)); break; + case T_FLOAT: { + __ flw(f28, Address(roffset, 0)); // f28 as temporaries + __ fmv_x_w(result, f28); // f{31--0}-->x + break; + } + case T_DOUBLE: { + __ fld(f28, Address(roffset, 0)); // f28 as temporaries + __ fmv_x_d(result, f28); // d{63--0}-->x + break; + } + default: ShouldNotReachHere(); + } + + // Using acquire: Order JVMTI check and load of result wrt. succeeding check + // (LoadStore for volatile field). + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + + __ lw(t0, safepoint_counter_addr); + __ bne(rcounter, t0, slow); + + switch (type) { + case T_FLOAT: __ fmv_w_x(f10, result); break; + case T_DOUBLE: __ fmv_d_x(f10, result); break; + default: __ mv(x10, result); break; + } + __ ret(); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind(slow); + address slow_case_addr; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + slow_case_addr = NULL; // unreachable + } + + { + __ enter(); + int32_t tmp_offset = 0; + __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); + __ jalr(x1, t0, tmp_offset); + __ leave(); + __ ret(); + } + __ flush(); + + return fast_entry; +} + + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniTypes_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/jniTypes_riscv.hpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_JNITYPES_RISCV_HPP +#define CPU_RISCV_JNITYPES_RISCV_HPP + +#include "jni.h" +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : private AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to+1). + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } + static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 1 + // Doubles are stored in native word format in one JavaCallArgument + // slot at *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + // No need to worry about alignment on Intel. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_RISCV_JNITYPES_RISCV_HPP Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -0,0 +1,4253 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/accessDecorators.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/klass.inline.hpp" +#include "oops/oop.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" +#include "utilities/powerOfTwo.hpp" +#ifdef COMPILER2 +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#endif + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif +#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg2, arg); + } +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + if (c_rarg3 != arg) { + assert_cond(masm != NULL); + masm->mv(c_rarg3, arg); + } +} + +void MacroAssembler::align(int modulus, int extra_offset) { + CompressibleRegion cr(this); + while ((offset() + extra_offset) % modulus != 0) { nop(); } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); +} + +// Implementation of call_VM versions + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +// these are no-ops overridden by InterpreterMacroAssembler +void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} +void MacroAssembler::check_and_handle_popframe(Register java_thread) {} + +// Calls to C land +// +// When entering C land, the fp, & esp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register tmp) { + + if (last_java_pc->is_valid()) { + sd(last_java_pc, Address(xthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + } + + // determine last_java_sp register + if (last_java_sp == sp) { + mv(tmp, sp); + last_java_sp = tmp; + } else if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); + } +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, + Register tmp) { + assert(last_java_pc != NULL, "must provide a valid PC"); + + la(tmp, last_java_pc); + sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &L, + Register tmp) { + if (L.is_bound()) { + set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); + } +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + // we must set sp to zero to clear frame + sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); + + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = xthread; + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = esp; + } + + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(java_thread == xthread, "unexpected register"); + + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + // push java thread (becomes first argument of C function) + mv(c_rarg0, java_thread); + + // set last Java frame before call + assert(last_java_sp != fp, "can't use fp"); + + Label l; + set_last_Java_frame(last_java_sp, fp, l, t0); + + // do the call, remove parameters + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); + + // reset last Java frame + // Only interpreter should have to clear fp + reset_last_Java_frame(true); + + // C++ interp handles this in the interpreter + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); + Label ok; + beqz(t0, ok); + int32_t offset = 0; + la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); + jalr(x0, t0, offset); + bind(ok); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, java_thread); + } +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + sd(zr, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { + assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); + assert_different_registers(klass, xthread, tmp); + + Label L_fallthrough, L_tmp; + if (L_fast_path == NULL) { + L_fast_path = &L_fallthrough; + } else if (L_slow_path == NULL) { + L_slow_path = &L_fallthrough; + } + + // Fast path check: class is fully initialized + lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); + sub(tmp, tmp, InstanceKlass::fully_initialized); + beqz(tmp, *L_fast_path); + + // Fast path check: current thread is initializer thread + ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); + + if (L_slow_path == &L_fallthrough) { + beq(xthread, tmp, *L_fast_path); + bind(*L_slow_path); + } else if (L_fast_path == &L_fallthrough) { + bne(xthread, tmp, *L_slow_path); + bind(*L_fast_path); + } else { + Unimplemented(); + } +} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) { return; } + + // Pass register number to verify_oop_subroutine + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop {"); + + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + mv(c_rarg0, reg); // c_rarg0 : x10 + li(t0, (uintptr_t)(address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; + la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); + ld(t1, Address(t1, offset)); + jalr(t1); + + pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + BLOCK_COMMENT("} verify_oop"); +} + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) { + return; + } + + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop_addr {"); + + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + if (addr.uses(sp)) { + la(x10, addr); + ld(x10, Address(x10, 4 * wordSize)); + } else { + ld(x10, addr); + } + + li(t0, (uintptr_t)(address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; + la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); + ld(t1, Address(t1, offset)); + jalr(t1); + + pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + BLOCK_COMMENT("} verify_oop_addr"); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + if (arg_slot.is_constant()) { + return Address(esp, arg_slot.as_constant() * stackElementSize + offset); + } else { + assert_different_registers(t0, arg_slot.as_register()); + shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); + return Address(t0, offset); + } +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) +{ + // In order to get locks to work, we need to fake a in_VM state + if (ShowMessageBoxOnError) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); +#ifndef PRODUCT + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + ttyLocker ttyl; + BytecodeCounter::print(); + } +#endif + if (os::message_box(msg, "Execution stopped, print registers?")) { + ttyLocker ttyl; + tty->print_cr(" pc = 0x%016lx", pc); +#ifndef PRODUCT + tty->cr(); + findpc(pc); + tty->cr(); +#endif + tty->print_cr(" x0 = 0x%016lx", regs[0]); + tty->print_cr(" x1 = 0x%016lx", regs[1]); + tty->print_cr(" x2 = 0x%016lx", regs[2]); + tty->print_cr(" x3 = 0x%016lx", regs[3]); + tty->print_cr(" x4 = 0x%016lx", regs[4]); + tty->print_cr(" x5 = 0x%016lx", regs[5]); + tty->print_cr(" x6 = 0x%016lx", regs[6]); + tty->print_cr(" x7 = 0x%016lx", regs[7]); + tty->print_cr(" x8 = 0x%016lx", regs[8]); + tty->print_cr(" x9 = 0x%016lx", regs[9]); + tty->print_cr("x10 = 0x%016lx", regs[10]); + tty->print_cr("x11 = 0x%016lx", regs[11]); + tty->print_cr("x12 = 0x%016lx", regs[12]); + tty->print_cr("x13 = 0x%016lx", regs[13]); + tty->print_cr("x14 = 0x%016lx", regs[14]); + tty->print_cr("x15 = 0x%016lx", regs[15]); + tty->print_cr("x16 = 0x%016lx", regs[16]); + tty->print_cr("x17 = 0x%016lx", regs[17]); + tty->print_cr("x18 = 0x%016lx", regs[18]); + tty->print_cr("x19 = 0x%016lx", regs[19]); + tty->print_cr("x20 = 0x%016lx", regs[20]); + tty->print_cr("x21 = 0x%016lx", regs[21]); + tty->print_cr("x22 = 0x%016lx", regs[22]); + tty->print_cr("x23 = 0x%016lx", regs[23]); + tty->print_cr("x24 = 0x%016lx", regs[24]); + tty->print_cr("x25 = 0x%016lx", regs[25]); + tty->print_cr("x26 = 0x%016lx", regs[26]); + tty->print_cr("x27 = 0x%016lx", regs[27]); + tty->print_cr("x28 = 0x%016lx", regs[28]); + tty->print_cr("x30 = 0x%016lx", regs[30]); + tty->print_cr("x31 = 0x%016lx", regs[31]); + BREAKPOINT; + } + } + fatal("DEBUG MESSAGE: %s", msg); +} + +void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { + Label done, not_weak; + beqz(value, done); // Use NULL as-is. + + // Test for jweak tag. + andi(t0, value, JNIHandles::weak_tag_mask); + beqz(t0, not_weak); + + // Resolve jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, + Address(value, -JNIHandles::weak_tag_value), tmp, thread); + verify_oop(value); + j(done); + + bind(not_weak); + // Resolve (untagged) jobject. + access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); + verify_oop(value); + bind(done); +} + +void MacroAssembler::stop(const char* msg) { + address ip = pc(); + pusha(); + // The length of the instruction sequence emitted should be independent + // of the values of msg and ip so that the size of mach nodes for scratch + // emit and normal emit matches. + mv(c_rarg0, (address)msg); + mv(c_rarg1, (address)ip); + mv(c_rarg2, sp); + mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + jalr(c_rarg3); + ebreak(); +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + +void MacroAssembler::emit_static_call_stub() { + // CompiledDirectStaticCall::set_to_interpreted knows the + // exact layout of this stub. + + ifence(); + mov_metadata(xmethod, (Metadata*)NULL); + + // Jump to the entry point of the i2c stub. + int32_t offset = 0; + movptr_with_offset(t0, 0, offset); + jalr(x0, t0, offset); +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr) { + call_native_base(entry_point, retaddr); +} + +void MacroAssembler::call_native(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_native_base(entry_point); +} + +void MacroAssembler::call_native_base(address entry_point, Label *retaddr) { + Label E, L; + int32_t offset = 0; + push_reg(0x80000040, sp); // push << t0 & xmethod >> to sp + movptr_with_offset(t0, entry_point, offset); + jalr(x1, t0, offset); + if (retaddr != NULL) { + bind(*retaddr); + } + pop_reg(0x80000040, sp); // pop << t0 & xmethod >> from sp +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, + Register arg_1, Register arg_2) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + pass_arg2(this, arg_2); + call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { + assert(arg_0 != c_rarg3, "smashed arg"); + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 4); +} + +void MacroAssembler::nop() { + addi(x0, x0, 0); +} + +void MacroAssembler::mv(Register Rd, Register Rs) { + if (Rd != Rs) { + addi(Rd, Rs, 0); + } +} + +void MacroAssembler::notr(Register Rd, Register Rs) { + xori(Rd, Rs, -1); +} + +void MacroAssembler::neg(Register Rd, Register Rs) { + sub(Rd, x0, Rs); +} + +void MacroAssembler::negw(Register Rd, Register Rs) { + subw(Rd, x0, Rs); +} + +void MacroAssembler::sext_w(Register Rd, Register Rs) { + addiw(Rd, Rs, 0); +} + +void MacroAssembler::zext_b(Register Rd, Register Rs) { + andi(Rd, Rs, 0xFF); +} + +void MacroAssembler::seqz(Register Rd, Register Rs) { + sltiu(Rd, Rs, 1); +} + +void MacroAssembler::snez(Register Rd, Register Rs) { + sltu(Rd, x0, Rs); +} + +void MacroAssembler::sltz(Register Rd, Register Rs) { + slt(Rd, Rs, x0); +} + +void MacroAssembler::sgtz(Register Rd, Register Rs) { + slt(Rd, x0, Rs); +} + +void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) { + if (Rd != Rs) { + fsgnj_s(Rd, Rs, Rs); + } +} + +void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) { + fsgnjx_s(Rd, Rs, Rs); +} + +void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) { + fsgnjn_s(Rd, Rs, Rs); +} + +void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) { + if (Rd != Rs) { + fsgnj_d(Rd, Rs, Rs); + } +} + +void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) { + fsgnjx_d(Rd, Rs, Rs); +} + +void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { + fsgnjn_d(Rd, Rs, Rs); +} + +void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { + vmnand_mm(vd, vs, vs); +} + +void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { + vnsrl_wx(vd, vs, x0, vm); +} + +void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { + vfsgnjn_vv(vd, vs, vs); +} + +void MacroAssembler::la(Register Rd, const address &dest) { + int64_t offset = dest - pc(); + if (is_offset_in_range(offset, 32)) { + auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit + addi(Rd, Rd, ((int64_t)offset << 52) >> 52); + } else { + movptr(Rd, dest); + } +} + +void MacroAssembler::la(Register Rd, const Address &adr) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), adr.rspec()); + relocInfo::relocType rtype = adr.rspec().reloc()->type(); + + switch (adr.getMode()) { + case Address::literal: { + if (rtype == relocInfo::none) { + li(Rd, (intptr_t)(adr.target())); + } else { + movptr(Rd, adr.target()); + } + break; + } + case Address::base_plus_offset: { + int32_t offset = 0; + baseOffset(Rd, adr, offset); + addi(Rd, Rd, offset); + break; + } + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::la(Register Rd, Label &label) { + la(Rd, target(label)); +} + +#define INSN(NAME) \ + void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ + NAME(Rs, zr, dest); \ + } \ + void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ + NAME(Rs, zr, l, is_far); \ + } \ + + INSN(beq); + INSN(bne); + INSN(blt); + INSN(ble); + INSN(bge); + INSN(bgt); + +#undef INSN + +// Float compare branch instructions + +#define INSN(NAME, FLOATCMP, BRANCH) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ + FLOATCMP##_s(t0, Rs1, Rs2); \ + BRANCH(t0, l, is_far); \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ + FLOATCMP##_d(t0, Rs1, Rs2); \ + BRANCH(t0, l, is_far); \ + } + + INSN(beq, feq, bnez); + INSN(bne, feq, beqz); + +#undef INSN + + +#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + if (is_unordered) { \ + /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_s(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ + /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_s(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + if (is_unordered) { \ + /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_d(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ + /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_d(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ + } + + INSN(ble, fle, flt); + INSN(blt, flt, fle); + +#undef INSN + +#define INSN(NAME, CMP) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ + double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ + } + + INSN(bgt, blt); + INSN(bge, ble); + +#undef INSN + + +#define INSN(NAME, CSR) \ + void MacroAssembler::NAME(Register Rd) { \ + csrr(Rd, CSR); \ + } + + INSN(rdinstret, CSR_INSTERT); + INSN(rdcycle, CSR_CYCLE); + INSN(rdtime, CSR_TIME); + INSN(frcsr, CSR_FCSR); + INSN(frrm, CSR_FRM); + INSN(frflags, CSR_FFLAGS); + +#undef INSN + +void MacroAssembler::csrr(Register Rd, unsigned csr) { + csrrs(Rd, csr, x0); +} + +#define INSN(NAME, OPFUN) \ + void MacroAssembler::NAME(unsigned csr, Register Rs) { \ + OPFUN(x0, csr, Rs); \ + } + + INSN(csrw, csrrw); + INSN(csrs, csrrs); + INSN(csrc, csrrc); + +#undef INSN + +#define INSN(NAME, OPFUN) \ + void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ + OPFUN(x0, csr, imm); \ + } + + INSN(csrwi, csrrwi); + INSN(csrsi, csrrsi); + INSN(csrci, csrrci); + +#undef INSN + +#define INSN(NAME, CSR) \ + void MacroAssembler::NAME(Register Rd, Register Rs) { \ + csrrw(Rd, CSR, Rs); \ + } + + INSN(fscsr, CSR_FCSR); + INSN(fsrm, CSR_FRM); + INSN(fsflags, CSR_FFLAGS); + +#undef INSN + +#define INSN(NAME) \ + void MacroAssembler::NAME(Register Rs) { \ + NAME(x0, Rs); \ + } + + INSN(fscsr); + INSN(fsrm); + INSN(fsflags); + +#undef INSN + +void MacroAssembler::fsrmi(Register Rd, unsigned imm) { + guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); + csrrwi(Rd, CSR_FRM, imm); +} + +void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { + csrrwi(Rd, CSR_FFLAGS, imm); +} + +#define INSN(NAME) \ + void MacroAssembler::NAME(unsigned imm) { \ + NAME(x0, imm); \ + } + + INSN(fsrmi); + INSN(fsflagsi); + +#undef INSN + +void MacroAssembler::push_reg(Register Rs) +{ + addi(esp, esp, 0 - wordSize); + sd(Rs, Address(esp, 0)); +} + +void MacroAssembler::pop_reg(Register Rd) +{ + ld(Rd, esp, 0); + addi(esp, esp, wordSize); +} + +int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { + int count = 0; + // Scan bitset to accumulate register pairs + for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } + bitset <<= 1; + } + return count; +} + +// Push lots of registers in the bit set supplied. Don't push sp. +// Return the number of words pushed +int MacroAssembler::push_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_pushed = 0;) + CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + // reserve one slot to align for odd count + int offset = is_even(count) ? 0 : wordSize; + + if (count) { + addi(stack, stack, - count * wordSize - offset); + } + for (int i = count - 1; i >= 0; i--) { + sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); + DEBUG_ONLY(words_pushed ++;) + } + + assert(words_pushed == count, "oops, pushed != count"); + + return count; +} + +int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_popped = 0;) + CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + // reserve one slot to align for odd count + int offset = is_even(count) ? 0 : wordSize; + + for (int i = count - 1; i >= 0; i--) { + ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); + DEBUG_ONLY(words_popped ++;) + } + + if (count) { + addi(stack, stack, count * wordSize + offset); + } + assert(words_popped == count, "oops, popped != count"); + + return count; +} + +// Push float registers in the bitset, except sp. +// Return the number of heapwords pushed. +int MacroAssembler::push_fp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int words_pushed = 0; + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + int push_slots = count + (count & 1); + + if (count) { + addi(stack, stack, -push_slots * wordSize); + } + + for (int i = count - 1; i >= 0; i--) { + fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); + words_pushed++; + } + + assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); + return count; +} + +int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int words_popped = 0; + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); + int pop_slots = count + (count & 1); + + for (int i = count - 1; i >= 0; i--) { + fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); + words_popped++; + } + + if (count) { + addi(stack, stack, pop_slots * wordSize); + } + + assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); + return count; +} + +#ifdef COMPILER2 +int MacroAssembler::push_vp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } + bitset <<= 1; + } + + for (int i = 0; i < count; i++) { + sub(stack, stack, vector_size_in_bytes); + vs1r_v(as_VectorRegister(regs[i]), stack); + } + + return count * vector_size_in_bytes / wordSize; +} + +int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { + CompressibleRegion cr(this); + int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } + bitset <<= 1; + } + + for (int i = count - 1; i >= 0; i--) { + vl1r_v(as_VectorRegister(regs[i]), stack); + add(stack, stack, vector_size_in_bytes); + } + + return count * vector_size_in_bytes / wordSize; +} +#endif // COMPILER2 + +void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); + // Push integer registers x7, x10-x17, x28-x31. + push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); + + // Push float registers f0-f7, f10-f17, f28-f31. + addi(sp, sp, - wordSize * 20); + int offset = 0; + for (int i = 0; i < 32; i++) { + if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { + fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); + } + } +} + +void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); + int offset = 0; + for (int i = 0; i < 32; i++) { + if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { + fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); + } + } + addi(sp, sp, wordSize * 20); + + pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); +} + +// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). +void MacroAssembler::pusha() { + CompressibleRegion cr(this); + push_reg(0xffffffe2, sp); +} + +// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). +void MacroAssembler::popa() { + CompressibleRegion cr(this); + pop_reg(0xffffffe2, sp); +} + +void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { + CompressibleRegion cr(this); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + push_reg(0xffffffe0, sp); + + // float registers + addi(sp, sp, - 32 * wordSize); + for (int i = 0; i < 32; i++) { + fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } + + // vector registers + if (save_vectors) { + sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); + vsetvli(t0, x0, Assembler::e64, Assembler::m8); + for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { + add(t0, sp, vector_size_in_bytes * i); + vse64_v(as_VectorRegister(i), t0); + } + } +} + +void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { + CompressibleRegion cr(this); + // vector registers + if (restore_vectors) { + vsetvli(t0, x0, Assembler::e64, Assembler::m8); + for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { + vle64_v(as_VectorRegister(i), sp); + add(sp, sp, vector_size_in_bytes * 8); + } + } + + // float registers + for (int i = 0; i < 32; i++) { + fld(as_FloatRegister(i), Address(sp, i * wordSize)); + } + addi(sp, sp, 32 * wordSize); + + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + pop_reg(0xffffffe0, sp); +} + +static int patch_offset_in_jal(address branch, int64_t offset) { + assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); + Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] + Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] + Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] + Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] + return NativeInstruction::instruction_size; // only one instruction +} + +static int patch_offset_in_conditional_branch(address branch, int64_t offset) { + assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); + Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] + Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] + Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] + Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] + return NativeInstruction::instruction_size; // only one instruction +} + +static int patch_offset_in_pc_relative(address branch, int64_t offset) { + const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load + Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] + Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] + return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; +} + +static int patch_addr_in_movptr(address branch, address target) { + const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load + int32_t lower = ((intptr_t)target << 36) >> 36; + int64_t upper = ((intptr_t)target - lower) >> 28; + Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] + Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] + Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] + Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] + return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + +static int patch_imm_in_li64(address branch, address target) { + const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi + int64_t lower = (intptr_t)target & 0xffffffff; + lower = lower - ((lower << 44) >> 44); + int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; + int32_t upper = (tmp_imm - (int32_t)lower) >> 32; + int64_t tmp_upper = upper, tmp_lower = upper; + tmp_lower = (tmp_lower << 52) >> 52; + tmp_upper -= tmp_lower; + tmp_upper >>= 12; + // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1), + // upper = target[63:32] + 1. + Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. + Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. + // Load the rest 32 bits. + Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. + Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. + Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. + return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + +static int patch_imm_in_li32(address branch, int32_t target) { + const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw + int64_t upper = (intptr_t)target; + int32_t lower = (((int32_t)target) << 20) >> 20; + upper -= lower; + upper = (int32_t)upper; + Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. + Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. + return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + +static long get_offset_of_jal(address insn_addr) { + assert_cond(insn_addr != NULL); + long offset = 0; + unsigned insn = *(unsigned*)insn_addr; + long val = (long)Assembler::sextract(insn, 31, 12); + offset |= ((val >> 19) & 0x1) << 20; + offset |= (val & 0xff) << 12; + offset |= ((val >> 8) & 0x1) << 11; + offset |= ((val >> 9) & 0x3ff) << 1; + offset = (offset << 43) >> 43; + return offset; +} + +static long get_offset_of_conditional_branch(address insn_addr) { + long offset = 0; + assert_cond(insn_addr != NULL); + unsigned insn = *(unsigned*)insn_addr; + offset = (long)Assembler::sextract(insn, 31, 31); + offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); + offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); + offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); + offset = (offset << 41) >> 41; + return offset; +} + +static long get_offset_of_pc_relative(address insn_addr) { + long offset = 0; + assert_cond(insn_addr != NULL); + offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. + offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. + offset = (offset << 32) >> 32; + return offset; +} + +static address get_target_of_movptr(address insn_addr) { + assert_cond(insn_addr != NULL); + intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. + return (address) target_address; +} + +static address get_target_of_li64(address insn_addr) { + assert_cond(insn_addr != NULL); + intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. + return (address)target_address; +} + +static address get_target_of_li32(address insn_addr) { + assert_cond(insn_addr != NULL); + intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. + return (address)target_address; +} + +// Patch any kind of instruction; there may be several instructions. +// Return the total length (in bytes) of the instructions. +int MacroAssembler::pd_patch_instruction_size(address branch, address target) { + assert_cond(branch != NULL); + int64_t offset = target - branch; + if (NativeInstruction::is_jal_at(branch)) { // jal + return patch_offset_in_jal(branch, offset); + } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne + return patch_offset_in_conditional_branch(branch, offset); + } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load + return patch_offset_in_pc_relative(branch, offset); + } else if (NativeInstruction::is_movptr_at(branch)) { // movptr + return patch_addr_in_movptr(branch, target); + } else if (NativeInstruction::is_li64_at(branch)) { // li64 + return patch_imm_in_li64(branch, target); + } else if (NativeInstruction::is_li32_at(branch)) { // li32 + int64_t imm = (intptr_t)target; + return patch_imm_in_li32(branch, (int32_t)imm); + } else { +#ifdef ASSERT + tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", + *(unsigned*)branch, p2i(branch)); + Disassembler::decode(branch - 16, branch + 16); +#endif + ShouldNotReachHere(); + return -1; + } +} + +address MacroAssembler::target_addr_for_insn(address insn_addr) { + long offset = 0; + assert_cond(insn_addr != NULL); + if (NativeInstruction::is_jal_at(insn_addr)) { // jal + offset = get_offset_of_jal(insn_addr); + } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne + offset = get_offset_of_conditional_branch(insn_addr); + } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load + offset = get_offset_of_pc_relative(insn_addr); + } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr + return get_target_of_movptr(insn_addr); + } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 + return get_target_of_li64(insn_addr); + } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 + return get_target_of_li32(insn_addr); + } else { + ShouldNotReachHere(); + } + return address(((uintptr_t)insn_addr + offset)); +} + +int MacroAssembler::patch_oop(address insn_addr, address o) { + // OOPs are either narrow (32 bits) or wide (48 bits). We encode + // narrow OOPs by setting the upper 16 bits in the first + // instruction. + if (NativeInstruction::is_li32_at(insn_addr)) { + // Move narrow OOP + uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); + return patch_imm_in_li32(insn_addr, (int32_t)n); + } else if (NativeInstruction::is_movptr_at(insn_addr)) { + // Move wide OOP + return patch_addr_in_movptr(insn_addr, o); + } + ShouldNotReachHere(); + return -1; +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops) { + if (Universe::is_fully_initialized()) { + mv(xheapbase, CompressedOops::ptrs_base()); + } else { + int32_t offset = 0; + la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); + ld(xheapbase, Address(xheapbase, offset)); + } + } +} + +void MacroAssembler::mv(Register Rd, Address dest) { + assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); + code_section()->relocate(pc(), dest.rspec()); + movptr(Rd, dest.target()); +} + +void MacroAssembler::mv(Register Rd, address addr) { + // Here in case of use with relocation, use fix length instruciton + // movptr instead of li + movptr(Rd, addr); +} + +void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); + } else { + mv(Rd, src.as_constant()); + } +} + +void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { + andr(Rd, Rs1, Rs2); + // addw: The result is clipped to 32 bits, then the sign bit is extended, + // and the result is stored in Rd + addw(Rd, Rd, zr); +} + +void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { + orr(Rd, Rs1, Rs2); + // addw: The result is clipped to 32 bits, then the sign bit is extended, + // and the result is stored in Rd + addw(Rd, Rd, zr); +} + +void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { + xorr(Rd, Rs1, Rs2); + // addw: The result is clipped to 32 bits, then the sign bit is extended, + // and the result is stored in Rd + addw(Rd, Rd, zr); +} + +// Note: load_unsigned_short used to be called load_unsigned_word. +int MacroAssembler::load_unsigned_short(Register dst, Address src) { + int off = offset(); + lhu(dst, src); + return off; +} + +int MacroAssembler::load_unsigned_byte(Register dst, Address src) { + int off = offset(); + lbu(dst, src); + return off; +} + +int MacroAssembler::load_signed_short(Register dst, Address src) { + int off = offset(); + lh(dst, src); + return off; +} + +int MacroAssembler::load_signed_byte(Register dst, Address src) { + int off = offset(); + lb(dst, src); + return off; +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ld(dst, src); break; + case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; + case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; + case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + case 8: sd(src, dst); break; + case 4: sw(src, dst); break; + case 2: sh(src, dst); break; + case 1: sb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +// reverse bytes in halfword in lower 16 bits and sign-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) +void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { + if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 48); + return; + } + assert_different_registers(Rs, tmp); + assert_different_registers(Rd, tmp); + srli(tmp, Rs, 8); + andi(tmp, tmp, 0xFF); + slli(Rd, Rs, 56); + srai(Rd, Rd, 48); // sign-extend + orr(Rd, Rd, tmp); +} + +// reverse bytes in lower word and sign-extend +// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) +void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 32); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb_h_w_u(Rd, Rs, tmp1, tmp2); + slli(tmp2, Rd, 48); + srai(tmp2, tmp2, 32); // sign-extend + srli(Rd, Rd, 16); + orr(Rd, Rd, tmp2); +} + +// reverse bytes in halfword in lower 16 bits and zero-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { + if (UseRVB) { + rev8(Rd, Rs); + srli(Rd, Rd, 48); + return; + } + assert_different_registers(Rs, tmp); + assert_different_registers(Rd, tmp); + srli(tmp, Rs, 8); + andi(tmp, tmp, 0xFF); + andi(Rd, Rs, 0xFF); + slli(Rd, Rd, 8); + orr(Rd, Rd, tmp); +} + +// reverse bytes in halfwords in lower 32 bits and zero-extend +// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + roriw(Rd, Rd, 16); + zext_w(Rd, Rd); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + srli(tmp2, Rs, 16); + revb_h_h_u(tmp2, tmp2, tmp1); + revb_h_h_u(Rd, Rs, tmp1); + slli(tmp2, tmp2, 16); + orr(Rd, Rd, tmp2); +} + +// This method is only used for revb_h +// Rd = Rs[47:0] Rs[55:48] Rs[63:56] +void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1); + srli(tmp1, Rs, 48); + andi(tmp2, tmp1, 0xFF); + slli(tmp2, tmp2, 8); + srli(tmp1, tmp1, 8); + orr(tmp1, tmp1, tmp2); + slli(Rd, Rs, 16); + orr(Rd, Rd, tmp1); +} + +// reverse bytes in each halfword +// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] +void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + assert_different_registers(Rs, tmp1); + assert_different_registers(Rd, tmp1); + rev8(Rd, Rs); + zext_w(tmp1, Rd); + roriw(tmp1, tmp1, 16); + slli(tmp1, tmp1, 32); + srli(Rd, Rd, 32); + roriw(Rd, Rd, 16); + zext_w(Rd, Rd); + orr(Rd, Rd, tmp1); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb_h_helper(Rd, Rs, tmp1, tmp2); + for (int i = 0; i < 3; ++i) { + revb_h_helper(Rd, Rd, tmp1, tmp2); + } +} + +// reverse bytes in each word +// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] +void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + revb(Rd, Rs, tmp1, tmp2); + ror_imm(Rd, Rd, 32); +} + +// reverse bytes in doubleword +// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] +void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { + if (UseRVB) { + rev8(Rd, Rs); + return; + } + assert_different_registers(Rs, tmp1, tmp2); + assert_different_registers(Rd, tmp1, tmp2); + andi(tmp1, Rs, 0xFF); + slli(tmp1, tmp1, 8); + for (int step = 8; step < 56; step += 8) { + srli(tmp2, Rs, step); + andi(tmp2, tmp2, 0xFF); + orr(tmp1, tmp1, tmp2); + slli(tmp1, tmp1, 8); + } + srli(Rd, Rs, 56); + andi(Rd, Rd, 0xFF); + orr(Rd, tmp1, Rd); +} + +// rotate right with shift bits +void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) +{ + if (UseRVB) { + rori(dst, src, shift); + return; + } + + assert_different_registers(dst, tmp); + assert_different_registers(src, tmp); + assert(shift < 64, "shift amount must be < 64"); + slli(tmp, src, 64 - shift); + srli(dst, src, shift); + orr(dst, dst, tmp); +} + +void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { + if (is_imm_in_range(imm, 12, 0)) { + and_imm12(Rd, Rn, imm); + } else { + assert_different_registers(Rn, tmp); + li(tmp, imm); + andr(Rd, Rn, tmp); + } +} + +void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { + ld(tmp1, adr); + if (src.is_register()) { + orr(tmp1, tmp1, src.as_register()); + } else { + if (is_imm_in_range(src.as_constant(), 12, 0)) { + ori(tmp1, tmp1, src.as_constant()); + } else { + assert_different_registers(tmp1, tmp2); + li(tmp2, src.as_constant()); + orr(tmp1, tmp1, tmp2); + } + } + sd(tmp1, adr); +} + +void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { + if (UseCompressedClassPointers) { + lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + if (CompressedKlassPointers::base() == NULL) { + slli(tmp, tmp, CompressedKlassPointers::shift()); + beq(trial_klass, tmp, L); + return; + } + decode_klass_not_null(tmp); + } else { + ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + } + beq(trial_klass, tmp, L); +} + +// Move an oop into a register. immediate is true if we want +// immediate instructions and nmethod entry barriers are not enabled. +// i.e. we are not going to patch this instruction while the code is being +// executed by another thread. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_oop_index(obj); + } else { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + // nmethod entry barrier necessitate using the constant pool. They have to be + // ordered with respected to oop access. + // Using immediate literals would necessitate fence.i. + if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { + address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address + ld_constant(dst, Address(dummy, rspec)); + } else + mv(dst, Address((address)obj, rspec)); +} + +// Move a metadata address into a register. +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } else { + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = metadata_Relocation::spec(oop_index); + mv(dst, Address((address)obj, rspec)); +} + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tmp. +void MacroAssembler::bang_stack_size(Register size, Register tmp) { + assert_different_registers(tmp, size, t0); + // Bang stack for total size given plus shadow page size. + // Bang one page at a time because large size can bang beyond yellow and + // red zones. + mv(t0, os::vm_page_size()); + Label loop; + bind(loop); + sub(tmp, sp, t0); + subw(size, size, t0); + sd(size, Address(tmp)); + bgtz(size, loop); + + // Bang down shadow pages too. + // At this point, (tmp-0) is the last address touched, so don't + // touch it again. (It was touched as (tmp-pagesize) but then tmp + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down to and including i=StackShadowPages. + for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + sub(tmp, tmp, os::vm_page_size()); + sd(size, Address(tmp, 0)); + } +} + +SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { + assert_cond(masm != NULL); + int32_t offset = 0; + _masm = masm; + _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); + _masm->lbu(t0, Address(t0, offset)); + _masm->beqz(t0, _label); +} + +SkipIfEqual::~SkipIfEqual() { + assert_cond(_masm != NULL); + _masm->bind(_label); + _masm = NULL; +} + +void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ld(dst, Address(xmethod, Method::const_offset())); + ld(dst, Address(dst, ConstMethod::constants_offset())); + ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); + ld(dst, Address(dst, mirror_offset)); + resolve_oop_handle(dst, tmp); +} + +void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + // OopHandle::resolve is an indirection. + assert_different_registers(result, tmp); + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); +} + +// ((WeakHandle)result).resolve() +void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { + assert_different_registers(result, tmp); + Label resolved; + + // A null weak handle resolves to null. + beqz(result, resolved); + + // Only 64 bit platforms support GCs that require a tmp register + // Only IN_HEAP loads require a thread_tmp register + // WeakHandle::resolve is an indirection like jweak. + access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + result, Address(result), tmp, noreg /* tmp_thread */); + bind(resolved); +} + +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Register dst, Address src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers + // NOTE: this is plenty to provoke a segv + ld(zr, Address(reg, 0)); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + Address dst, Register src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +// Algorithm must match CompressedOops::encode. +void MacroAssembler::encode_heap_oop(Register d, Register s) { + verify_oop(s, "broken oop in encode_heap_oop"); + if (CompressedOops::base() == NULL) { + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srli(d, s, LogMinObjAlignmentInBytes); + } else { + mv(d, s); + } + } else { + Label notNull; + sub(d, s, xheapbase); + bgez(d, notNull); + mv(d, zr); + bind(notNull); + if (CompressedOops::shift() != 0) { + assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srli(d, d, CompressedOops::shift()); + } + } +} + +void MacroAssembler::load_klass(Register dst, Register src) { + if (UseCompressedClassPointers) { + lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else { + ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::store_klass(Register dst, Register src) { + // FIXME: Should this be a store release? concurrent gcs assumes + // klass length is valid if klass field is not null. + if (UseCompressedClassPointers) { + encode_klass_not_null(src); + sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); + } else { + sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + // Store to klass gap in destination + sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + decode_klass_not_null(r, r); +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + + if (CompressedKlassPointers::base() == NULL) { + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + slli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); + } + return; + } + + Register xbase = dst; + if (dst == src) { + xbase = tmp; + } + + assert_different_registers(src, xbase); + li(xbase, (uintptr_t)CompressedKlassPointers::base()); + + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + assert_different_registers(t0, xbase); + shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); + } else { + add(dst, xbase, src); + } + + if (xbase == xheapbase) { reinit_heapbase(); } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + encode_klass_not_null(r, r); +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + + if (CompressedKlassPointers::base() == NULL) { + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); + } + return; + } + + if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && + CompressedKlassPointers::shift() == 0) { + zero_extend(dst, src, 32); + return; + } + + Register xbase = dst; + if (dst == src) { + xbase = tmp; + } + + assert_different_registers(src, xbase); + li(xbase, (intptr_t)CompressedKlassPointers::base()); + sub(dst, src, xbase); + if (CompressedKlassPointers::shift() != 0) { + assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srli(dst, dst, LogKlassAlignmentInBytes); + } + if (xbase == xheapbase) { + reinit_heapbase(); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + decode_heap_oop_not_null(r, r); +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + assert(UseCompressedOops, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (CompressedOops::shift() != 0) { + assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + slli(dst, src, LogMinObjAlignmentInBytes); + if (CompressedOops::base() != NULL) { + add(dst, xheapbase, dst); + } + } else { + assert(CompressedOops::base() == NULL, "sanity"); + mv(dst, src); + } +} + +void MacroAssembler::decode_heap_oop(Register d, Register s) { + if (CompressedOops::base() == NULL) { + if (CompressedOops::shift() != 0 || d != s) { + slli(d, s, CompressedOops::shift()); + } + } else { + Label done; + mv(d, s); + beqz(s, done); + shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); + bind(done); + } + verify_oop(d, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { + access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); +} + +int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, + bool want_remainder) +{ + // Full implementation of Java idiv and irem. The function + // returns the (pc) offset of the div instruction - may be needed + // for implicit exceptions. + // + // input : rs1: dividend + // rs2: divisor + // + // result: either + // quotient (= rs1 idiv rs2) + // remainder (= rs1 irem rs2) + + + int idivl_offset = offset(); + if (!want_remainder) { + divw(result, rs1, rs2); + } else { + remw(result, rs1, rs2); // result = rs1 % rs2; + } + return idivl_offset; +} + +int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, + bool want_remainder) +{ + // Full implementation of Java ldiv and lrem. The function + // returns the (pc) offset of the div instruction - may be needed + // for implicit exceptions. + // + // input : rs1: dividend + // rs2: divisor + // + // result: either + // quotient (= rs1 idiv rs2) + // remainder (= rs1 irem rs2) + + int idivq_offset = offset(); + if (!want_remainder) { + div(result, rs1, rs2); + } else { + rem(result, rs1, rs2); // result = rs1 % rs2; + } + return idivq_offset; +} + +// Look up the method for a megamorpic invkkeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_tmp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_tmp); + assert_different_registers(method_result, intf_klass, scan_tmp); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when mehtid isn't needed"); + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must be same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable). + int vtable_base = in_bytes(Klass::vtable_start_offset()); + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size_in_bytes(); + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); + + // %%% Could store the aligned, prescaled offset in the klassoop. + shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); + add(scan_tmp, scan_tmp, vtable_base); + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + if (itable_index.is_register()) { + slli(t0, itable_index.as_register(), 3); + } else { + li(t0, itable_index.as_constant() << 3); + } + add(recv_klass, recv_klass, t0); + if (itentry_off) { + add(recv_klass, recv_klass, itentry_off); + } + } + + Label search, found_method; + + ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); + beq(intf_klass, method_result, found_method); + bind(search); + // Check that the previous entry is non-null. A null entry means that + // the receiver class doens't implement the interface, and wasn't the + // same as when the caller was compiled. + beqz(method_result, L_no_such_interface, /* is_far */ true); + addi(scan_tmp, scan_tmp, scan_step); + ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); + bne(intf_klass, method_result, search); + + bind(found_method); + + // Got a hit. + if (return_method) { + lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); + add(method_result, recv_klass, scan_tmp); + ld(method_result, Address(method_result)); + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + const int base = in_bytes(Klass::vtable_start_offset()); + assert(vtableEntry::size() * wordSize == 8, + "adjust the scaling in the code below"); + int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); + + if (vtable_index.is_register()) { + shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); + ld(method_result, Address(method_result, vtable_offset_in_bytes)); + } else { + vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; + ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); + } +} + +void MacroAssembler::membar(uint32_t order_constraint) { + address prev = pc() - NativeMembar::instruction_size; + address last = code()->last_insn(); + + if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { + NativeMembar *bar = NativeMembar_at(prev); + // We are merging two memory barrier instructions. On RISCV we + // can do this simply by ORing them together. + bar->set_kind(bar->get_kind() | order_constraint); + BLOCK_COMMENT("merged membar"); + } else { + code()->set_last_insn(pc()); + + uint32_t predecessor = 0; + uint32_t successor = 0; + + membar_mask_to_pred_succ(order_constraint, predecessor, successor); + fence(predecessor, successor); + } +} + +// Form an addres from base + offset in Rd. Rd my or may not +// actually be used: you must use the Address that is returned. It +// is up to you to ensure that the shift provided mathces the size +// of your data. +Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { + if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 + return Address(base, byte_offset); + } + + // Do it the hard way + mv(Rd, byte_offset); + add(Rd, base, Rd); + return Address(Rd); +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + +void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { + ld(t0, Address(xthread, JavaThread::polling_word_offset())); + if (acquire) { + membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + } + if (at_return) { + bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); + } else { + andi(t0, t0, SafepointMechanism::poll_bit()); + bnez(t0, slow_path, true /* is_far */); + } +} + +void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + // oldv holds comparison value + // newv holds value to write in exchange + // addr identifies memory word to compare against/update + Label retry_load, nope; + bind(retry_load); + // Load reserved from the memory location + lr_d(tmp, addr, Assembler::aqrl); + // Fail and exit if it is not what we expect + bne(tmp, oldv, nope); + // If the store conditional succeeds, tmp will be zero + sc_d(tmp, newv, addr, Assembler::rl); + beqz(tmp, succeed); + // Retry only when the store conditional failed + j(retry_load); + + bind(nope); + membar(AnyAny); + mv(oldv, tmp); + if (fail != NULL) { + j(*fail); + } +} + +void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, + Label &succeed, Label *fail) { + assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); + cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); +} + +void MacroAssembler::load_reserved(Register addr, + enum operand_size size, + Assembler::Aqrl acquire) { + switch (size) { + case int64: + lr_d(t0, addr, acquire); + break; + case int32: + lr_w(t0, addr, acquire); + break; + case uint32: + lr_w(t0, addr, acquire); + zero_extend(t0, t0, 32); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::store_conditional(Register addr, + Register new_val, + enum operand_size size, + Assembler::Aqrl release) { + switch (size) { + case int64: + sc_d(t0, new_val, addr, release); + break; + case int32: + case uint32: + sc_w(t0, new_val, addr, release); + break; + default: + ShouldNotReachHere(); + } +} + + +void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, + Register new_val, + enum operand_size size, + Register tmp1, Register tmp2, Register tmp3) { + assert(size == int8 || size == int16, "unsupported operand size"); + + Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; + + andi(shift, addr, 3); + slli(shift, shift, 3); + + andi(aligned_addr, addr, ~3); + + if (size == int8) { + addi(mask, zr, 0xff); + } else { + // size == int16 case + addi(mask, zr, -1); + zero_extend(mask, mask, 16); + } + sll(mask, mask, shift); + + xori(not_mask, mask, -1); + + sll(expected, expected, shift); + andr(expected, expected, mask); + + sll(new_val, new_val, shift); + andr(new_val, new_val, mask); +} + +// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. +// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, +// which are forced to work with 4-byte aligned address. +void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool, + Register tmp1, Register tmp2, Register tmp3) { + Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; + assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); + cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + + Label retry, fail, done; + + bind(retry); + lr_w(old, aligned_addr, acquire); + andr(tmp, old, mask); + bne(tmp, expected, fail); + + andr(tmp, old, not_mask); + orr(tmp, tmp, new_val); + sc_w(tmp, tmp, aligned_addr, release); + bnez(tmp, retry); + + if (result_as_bool) { + addi(result, zr, 1); + j(done); + + bind(fail); + mv(result, zr); + + bind(done); + } else { + andr(tmp, old, mask); + + bind(fail); + srl(result, tmp, shift); + + if (size == int8) { + sign_extend(result, result, 8); + } else { + // size == int16 case + sign_extend(result, result, 16); + } + } +} + +// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement +// the weak CAS stuff. The major difference is that it just failed when store conditional +// failed. +void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, + Register tmp1, Register tmp2, Register tmp3) { + Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; + assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); + cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + + Label succ, fail, done; + + lr_w(old, aligned_addr, acquire); + andr(tmp, old, mask); + bne(tmp, expected, fail); + + andr(tmp, old, not_mask); + orr(tmp, tmp, new_val); + sc_w(tmp, tmp, aligned_addr, release); + beqz(tmp, succ); + + bind(fail); + addi(result, zr, 1); + j(done); + + bind(succ); + mv(result, zr); + + bind(done); +} + +void MacroAssembler::cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool) { + assert(size != int8 && size != int16, "unsupported operand size"); + + Label retry_load, done, ne_done; + bind(retry_load); + load_reserved(addr, size, acquire); + bne(t0, expected, ne_done); + store_conditional(addr, new_val, size, release); + bnez(t0, retry_load); + + // equal, succeed + if (result_as_bool) { + li(result, 1); + } else { + mv(result, expected); + } + j(done); + + // not equal, failed + bind(ne_done); + if (result_as_bool) { + mv(result, zr); + } else { + mv(result, t0); + } + + bind(done); +} + +void MacroAssembler::cmpxchg_weak(Register addr, Register expected, + Register new_val, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result) { + Label fail, done, sc_done; + load_reserved(addr, size, acquire); + bne(t0, expected, fail); + store_conditional(addr, new_val, size, release); + beqz(t0, sc_done); + + // fail + bind(fail); + li(result, 1); + j(done); + + // sc_done + bind(sc_done); + mv(result, 0); + bind(done); +} + +#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ +void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ + prev = prev->is_valid() ? prev : zr; \ + if (incr.is_register()) { \ + AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ + } else { \ + mv(t0, incr.as_constant()); \ + AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ + } \ + return; \ +} + +ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) +ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) +ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) +ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) + +#undef ATOMIC_OP + +#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ +void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ + prev = prev->is_valid() ? prev : zr; \ + AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ + return; \ +} + +ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) +ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) +ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) +ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) + +#undef ATOMIC_XCHG + +#define ATOMIC_XCHGU(OP1, OP2) \ +void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ + atomic_##OP2(prev, newv, addr); \ + zero_extend(prev, prev, 32); \ + return; \ +} + +ATOMIC_XCHGU(xchgwu, xchgw) +ATOMIC_XCHGU(xchgalwu, xchgalw) + +#undef ATOMIC_XCHGU + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done, Register flag) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(temp_reg, temp_reg, markWord::biased_lock_mask_in_place); // 1 << 3 + sub(temp_reg, temp_reg, (u1)markWord::biased_lock_pattern); + if (flag->is_valid()) { mv(flag, temp_reg); } + beqz(temp_reg, done); +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters, + Register flag) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert_different_registers(lock_reg, obj_reg, swap_reg); + + if (PrintBiasedLockingStatistics && counters == NULL) { + counters = BiasedLocking::counters(); + } + + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag); + assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + if (!swap_reg_contains_mark) { + ld(swap_reg, mark_addr); + } + andi(tmp_reg, swap_reg, markWord::biased_lock_mask_in_place); + xori(t0, tmp_reg, (u1)markWord::biased_lock_pattern); + bnez(t0, cas_label); // don't care flag unless jumping to done + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, tmp_reg, xthread); + xorr(tmp_reg, swap_reg, tmp_reg); + andi(tmp_reg, tmp_reg, ~((int) markWord::age_mask_in_place)); + if (flag->is_valid()) { + mv(flag, tmp_reg); + } + + if (counters != NULL) { + Label around; + bnez(tmp_reg, around); + atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); + j(done); + bind(around); + } else { + beqz(tmp_reg, done); + } + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + andi(t0, tmp_reg, markWord::biased_lock_mask_in_place); + bnez(t0, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + andi(t0, tmp_reg, markWord::epoch_mask_in_place); + bnez(t0, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + { + Label cas_success; + Label counter; + li(t0, (int64_t)(markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place)); + andr(swap_reg, swap_reg, t0); + orr(tmp_reg, swap_reg, xthread); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); + // cas failed here if slow_cass == NULL + if (flag->is_valid()) { + li(flag, 1); + j(counter); + } + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + bind(cas_success); + if (flag->is_valid()) { + li(flag, 0); + bind(counter); + } + + if (counters != NULL) { + atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), + tmp_reg, t0); + } + } + j(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label cas_success; + Label counter; + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, xthread, tmp_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); + // cas failed here if slow_cass == NULL + if (flag->is_valid()) { + li(flag, 1); + j(counter); + } + + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + bind(cas_success); + if (flag->is_valid()) { + li(flag, 0); + bind(counter); + } + + if (counters != NULL) { + atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), + tmp_reg, t0); + } + } + j(done); + + // don't care flag unless jumping to done + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label cas_success, nope; + load_prototype_header(tmp_reg, obj_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); + bind(cas_success); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, + t0); + } + bind(nope); + } + + bind(cas_label); +} + +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { + Label retry_load; + bind(retry_load); + // flush and load exclusive from the memory location + lr_w(tmp, counter_addr); + addw(tmp, tmp, 1); + // if we store+flush with no intervening write tmp wil be zero + sc_w(tmp, tmp, counter_addr); + bnez(tmp, retry_load); +} + +void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + int32_t offset = 0; + if (far_branches()) { + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); + if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x0, tmp, offset); + } else { + if (cbuf != NULL) { cbuf->set_insts_mark(); } + j(entry); + } +} + +void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + int32_t offset = 0; + if (far_branches()) { + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); + if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x1, tmp, offset); // link + } else { + if (cbuf != NULL) { cbuf->set_insts_mark(); } + jal(entry); // link + } +} + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + Register super_check_offset) { + assert_different_registers(sub_klass, super_klass, tmp_reg); + bool must_load_sco = (super_check_offset == noreg); + if (must_load_sco) { + assert(tmp_reg != noreg, "supply either a temp or a register offset"); + } else { + assert_different_registers(sub_klass, super_klass, super_check_offset); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else j(label) /*omit semi*/ + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front fo the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + beq(sub_klass, super_klass, *L_success); + + // Check the supertype display: + if (must_load_sco) { + lwu(tmp_reg, super_check_offset_addr); + super_check_offset = tmp_reg; + } + add(t0, sub_klass, super_check_offset); + Address super_check_addr(t0); + ld(t0, super_check_addr); // load displayed supertype + + // Ths check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_Cache and the primary super dispaly elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + beq(super_klass, t0, *L_success); + mv(t1, sc_offset); + if (L_failure == &L_fallthrough) { + beq(super_check_offset, t1, *L_slow_path); + } else { + bne(super_check_offset, t1, *L_failure, /* is_far */ true); + final_jmp(*L_slow_path); + } + + bind(L_fallthrough); + +#undef final_jmp +} + +// Scans count pointer sized words at [addr] for occurence of value, +// generic +void MacroAssembler::repne_scan(Register addr, Register value, Register count, + Register tmp) { + Label Lloop, Lexit; + beqz(count, Lexit); + bind(Lloop); + ld(tmp, addr); + beq(value, tmp, Lexit); + add(addr, addr, wordSize); + sub(count, count, 1); + bnez(count, Lloop); + bind(Lexit); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure) { + assert_different_registers(sub_klass, super_klass, tmp1_reg); + if (tmp2_reg != noreg) { + assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); + } +#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // A couple of usefule fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + BLOCK_COMMENT("check_klass_subtype_slow_path"); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connecitons with the input regs. + + assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) + assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) + + RegSet pushed_registers; + if (!IS_A_TEMP(x12)) { + pushed_registers += x12; + } + if (!IS_A_TEMP(x15)) { + pushed_registers += x15; + } + + if (super_klass != x10 || UseCompressedOops) { + if (!IS_A_TEMP(x10)) { + pushed_registers += x10; + } + } + + push_reg(pushed_registers, sp); + + // Get super_klass value into x10 (even if it was in x15 or x12) + mv(x10, super_klass); + +#ifndef PRODUCT + mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); + Address pst_counter_addr(t1); + ld(t0, pst_counter_addr); + add(t0, t0, 1); + sd(t0, pst_counter_addr); +#endif // PRODUCT + + // We will consult the secondary-super array. + ld(x15, secondary_supers_addr); + // Load the array length. + lwu(x12, Address(x15, Array::length_offset_in_bytes())); + // Skip to start of data. + add(x15, x15, Array::base_offset_in_bytes()); + + // Set t0 to an obvious invalid value, falling through by default + li(t0, -1); + // Scan X12 words at [X15] for an occurrence of X10. + repne_scan(x15, x10, x12, t0); + + // pop will restore x10, so we should use a temp register to keep its value + mv(t1, x10); + + // Unspill the temp registers: + pop_reg(pushed_registers, sp); + + bne(t1, t0, *L_failure); + + // Success. Cache the super we found an proceed in triumph. + sd(super_klass, super_cache_addr); + + if (L_success != &L_fallthrough) { + j(*L_success); + } + +#undef IS_A_TEMP + + bind(L_fallthrough); +} + +// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1, + Register tmp2, + Label& slow_case, + bool is_far) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp, + Label& slow_case, + bool is_far) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); +} + + +// get_thread() can be called anywhere inside generated code so we +// need to save whatever non-callee save context might get clobbered +// by the call to Thread::current() or, indeed, the call setup code. +void MacroAssembler::get_thread(Register thread) { + // save all call-clobbered regs except thread + RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + + RegSet::range(x28, x31) + ra - thread; + push_reg(saved_regs, sp); + + int32_t offset = 0; + movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset); + jalr(ra, ra, offset); + if (thread != x10) { + mv(thread, x10); + } + + // restore pushed registers + pop_reg(saved_regs, sp); +} + +void MacroAssembler::load_byte_map_base(Register reg) { + CardTable::CardValue* byte_map_base = + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); + li(reg, (uint64_t)byte_map_base); +} + +void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { + relocInfo::relocType rtype = dest.rspec().reloc()->type(); + unsigned long low_address = (uintptr_t)CodeCache::low_bound(); + unsigned long high_address = (uintptr_t)CodeCache::high_bound(); + unsigned long dest_address = (uintptr_t)dest.target(); + long offset_low = dest_address - low_address; + long offset_high = dest_address - high_address; + + assert(is_valid_riscv64_address(dest.target()), "bad address"); + assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); + + InstructionMark im(this); + code_section()->relocate(inst_mark(), dest.rspec()); + // RISC-V doesn't compute a page-aligned address, in order to partially + // compensate for the use of *signed* offsets in its base+disp12 + // addressing mode (RISC-V's PC-relative reach remains asymmetric + // [-(2G + 2K), 2G - 2k). + if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { + int64_t distance = dest.target() - pc(); + auipc(reg1, (int32_t)distance + 0x800); + offset = ((int32_t)distance << 20) >> 20; + } else { + movptr_with_offset(reg1, dest.target(), offset); + } +} + +void MacroAssembler::build_frame(int framesize) { + assert(framesize >= 2, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + sub(sp, sp, framesize); + sd(fp, Address(sp, framesize - 2 * wordSize)); + sd(ra, Address(sp, framesize - wordSize)); + if (PreserveFramePointer) { add(fp, sp, framesize); } + verify_cross_modify_fence_not_required(); +} + +void MacroAssembler::remove_frame(int framesize) { + assert(framesize >= 2, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + ld(fp, Address(sp, framesize - 2 * wordSize)); + ld(ra, Address(sp, framesize - wordSize)); + add(sp, sp, framesize); +} + +void MacroAssembler::reserved_stack_check() { + // testing if reserved zone needs to be enabled + Label no_reserved_zone_enabling; + + ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); + bltu(sp, t0, no_reserved_zone_enabling); + + enter(); // RA and FP are live. + mv(c_rarg0, xthread); + int32_t offset = 0; + la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); + jalr(x1, t0, offset); + leave(); + + // We have already removed our own frame. + // throw_delayed_StackOverflowError will think that it's been + // called by our caller. + offset = 0; + la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); + jalr(x0, t0, offset); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); +} + +// Move the address of the polling page into dest. +void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { + ld(dest, Address(xthread, JavaThread::polling_page_offset())); +} + +// Read the polling page. The address of the polling page must +// already be in r. +address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { + address mark; + { + InstructionMark im(this); + code_section()->relocate(inst_mark(), rtype); + lwu(zr, Address(r, offset)); + mark = inst_mark(); + } + verify_cross_modify_fence_not_required(); + return mark; +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert (UseCompressedOops, "should only be used for compressed oops"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + int oop_index = oop_recorder()->find_index(obj); + InstructionMark im(this); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + code_section()->relocate(inst_mark(), rspec); + li32(dst, 0xDEADBEEF); + zero_extend(dst, dst, 32); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->find_index(k); + assert(!Universe::heap()->is_in(k), "should not be an oop"); + + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + code_section()->relocate(inst_mark(), rspec); + narrowKlass nk = CompressedKlassPointers::encode(k); + li32(dst, nk); + zero_extend(dst, dst, 32); +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. +address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type || + entry.rspec().type() == relocInfo::opt_virtual_call_type || + entry.rspec().type() == relocInfo::static_call_type || + entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + bool need_trampoline = far_branches(); + if (!need_trampoline && entry.rspec().type() == relocInfo::runtime_call_type && !CodeCache::contains(entry.target())) { + // If it is a runtime call of an address outside small CodeCache, + // we need to check whether it is in range. + address target = entry.target(); + assert(target < CodeCache::low_bound() || target >= CodeCache::high_bound(), "target is inside CodeCache"); + // Case 1: -------T-------L====CodeCache====H------- + // ^-------longest branch---| + // Case 2: -------L====CodeCache====H-------T------- + // |-------longest branch ---^ + address longest_branch_start = (target < CodeCache::low_bound()) ? CodeCache::high_bound() - NativeInstruction::instruction_size + : CodeCache::low_bound(); + need_trampoline = !reachable_from_branch_at(longest_branch_start, target); + } + + // We need a trampoline if branches are far. + if (need_trampoline) { + bool in_scratch_emit_size = false; +#ifdef COMPILER2 + // We don't want to emit a trampoline if C2 is generating dummy + // code during its branch shortening phase. + CompileTask* task = ciEnv::current()->task(); + in_scratch_emit_size = + (task != NULL && is_c2_compile(task->comp_level()) && + Compile::current()->output()->in_scratch_emit_size()); +#endif + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); + if (stub == NULL) { + postcond(pc() == badAddress); + return NULL; // CodeCache is full + } + } + } + + if (cbuf != NULL) { cbuf->set_insts_mark(); } + relocate(entry.rspec()); + if (!need_trampoline) { + jal(entry.target()); + } else { + jal(pc()); + } + // just need to return a non-null address + postcond(pc() != badAddress); + return pc(); +} + +address MacroAssembler::ic_call(address entry, jint method_index) { + RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); + movptr(t1, (address)Universe::non_oop_word()); + assert_cond(entry != NULL); + return trampoline_call(Address(entry, rh)); +} + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to or +// +// Related trampoline stub for this call site in the stub section: +// load the call target from the constant pool +// branch (RA still points to the call site above) + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + address stub = start_a_stub(NativeInstruction::instruction_size + + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + + // make sure 4 byte aligned here, so that the destination address would be + // 8 byte aligned after 3 intructions + // when we reach here we may get a 2-byte alignment so need to align it + align(wordSize, NativeCallTrampolineStub::data_offset); + + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + const int stub_start_offset = offset(); + + // Now, create the trampoline stub's code: + // - load the call + // - call + Label target; + ld(t0, target); // auipc + ld + jr(t0); // jalr + bind(target); + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); + assert(offset() % wordSize == 0, "bad alignment"); + emit_int64((intptr_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); + + assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); + + end_a_stub(); + return stub_start_addr; +} + +Address MacroAssembler::add_memory_helper(const Address dst) { + switch (dst.getMode()) { + case Address::base_plus_offset: + // This is the expected mode, although we allow all the other + // forms below. + return form_address(t1, dst.base(), dst.offset()); + default: + la(t1, dst); + return Address(t1); + } +} + +void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) { + Address adr = add_memory_helper(dst); + assert_different_registers(adr.base(), t0); + ld(t0, adr); + addi(t0, t0, imm); + sd(t0, adr); +} + +void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { + Address adr = add_memory_helper(dst); + assert_different_registers(adr.base(), t0); + lwu(t0, adr); + addiw(t0, t0, imm); + sw(t0, adr); +} + +void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { + assert_different_registers(src1, t0); + int32_t offset; + la_patchable(t0, src2, offset); + ld(t0, Address(t0, offset)); + beq(src1, t0, equal); +} + +void MacroAssembler::load_method_holder_cld(Register result, Register method) { + load_method_holder(result, method); + ld(result, Address(result, InstanceKlass::class_loader_data_offset())); +} + +void MacroAssembler::load_method_holder(Register holder, Register method) { + ld(holder, Address(method, Method::const_offset())); // ConstMethod* + ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* + ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* +} + +// string indexof +// compute index by trailing zeros +void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, + Register match_mask, Register result, + Register ch2, Register tmp, + bool haystack_isL) +{ + int haystack_chr_shift = haystack_isL ? 0 : 1; + srl(match_mask, match_mask, trailing_zeros); + srli(match_mask, match_mask, 1); + srli(tmp, trailing_zeros, LogBitsPerByte); + if (!haystack_isL) andi(tmp, tmp, 0xE); + add(haystack, haystack, tmp); + ld(ch2, Address(haystack)); + if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); + add(result, result, tmp); +} + +// string indexof +// Find pattern element in src, compute match mask, +// only the first occurrence of 0x80/0x8000 at low bits is the valid match index +// match mask patterns and corresponding indices would be like: +// - 0x8080808080808080 (Latin1) +// - 7 6 5 4 3 2 1 0 (match index) +// - 0x8000800080008000 (UTF16) +// - 3 2 1 0 (match index) +void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, + Register mask1, Register mask2) +{ + xorr(src, pattern, src); + sub(match_mask, src, mask1); + orr(src, src, mask2); + notr(src, src); + andr(match_mask, match_mask, src); +} + +#ifdef COMPILER2 +// Code for BigInteger::mulAdd instrinsic +// out = x10 +// in = x11 +// offset = x12 (already out.length-offset) +// len = x13 +// k = x14 +// tmp = x28 +// +// pseudo code from java implementation: +// long kLong = k & LONG_MASK; +// carry = 0; +// offset = out.length-offset - 1; +// for (int j = len - 1; j >= 0; j--) { +// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; +// out[offset--] = (int)product; +// carry = product >>> 32; +// } +// return (int)carry; +void MacroAssembler::mul_add(Register out, Register in, Register offset, + Register len, Register k, Register tmp) { + Label L_tail_loop, L_unroll, L_end; + mv(tmp, out); + mv(out, zr); + blez(len, L_end); + zero_extend(k, k, 32); + slliw(t0, offset, LogBytesPerInt); + add(offset, tmp, t0); + slliw(t0, len, LogBytesPerInt); + add(in, in, t0); + + const int unroll = 8; + li(tmp, unroll); + blt(len, tmp, L_tail_loop); + bind(L_unroll); + for (int i = 0; i < unroll; i++) { + sub(in, in, BytesPerInt); + lwu(t0, Address(in, 0)); + mul(t1, t0, k); + add(t0, t1, out); + sub(offset, offset, BytesPerInt); + lwu(t1, Address(offset, 0)); + add(t0, t0, t1); + sw(t0, Address(offset, 0)); + srli(out, t0, 32); + } + subw(len, len, tmp); + bge(len, tmp, L_unroll); + + bind(L_tail_loop); + blez(len, L_end); + sub(in, in, BytesPerInt); + lwu(t0, Address(in, 0)); + mul(t1, t0, k); + add(t0, t1, out); + sub(offset, offset, BytesPerInt); + lwu(t1, Address(offset, 0)); + add(t0, t0, t1); + sw(t0, Address(offset, 0)); + srli(out, t0, 32); + subw(len, len, 1); + j(L_tail_loop); + + bind(L_end); +} + +// add two unsigned input and output carry +void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, carry); + assert_different_registers(dst, src2); + add(dst, src1, src2); + sltu(carry, dst, src2); +} + +// add two input with carry +void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, carry); + add(dst, src1, src2); + add(dst, dst, carry); +} + +// add two unsigned input with carry and output carry +void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) +{ + assert_different_registers(dst, src2); + adc(dst, src1, src2, carry); + sltu(carry, dst, src2); +} + +void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, + Register src1, Register src2, Register carry) +{ + cad(dest_lo, dest_lo, src1, carry); + add(dest_hi, dest_hi, carry); + cad(dest_lo, dest_lo, src2, carry); + add(final_dest_hi, dest_hi, carry); +} + +/** + * Multiply 32 bit by 32 bit first loop. + */ +void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) +{ + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { + // long product = y[idx] * x[xstart] + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + + Label L_first_loop, L_first_loop_exit; + blez(idx, L_first_loop_exit); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + lwu(x_xstart, Address(t0, 0)); + + bind(L_first_loop); + subw(idx, idx, 1); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(y_idx, Address(t0, 0)); + mul(product, x_xstart, y_idx); + add(product, product, carry); + srli(carry, product, 32); + subw(kdx, kdx, 1); + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(product, Address(t0, 0)); + bgtz(idx, L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) +{ + // + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + // + + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + + subw(xstart, xstart, 1); + bltz(xstart, L_one_x); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + ld(x_xstart, Address(t0, 0)); + ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian + + bind(L_first_loop); + subw(idx, idx, 1); + bltz(idx, L_first_loop_exit); + subw(idx, idx, 1); + bltz(idx, L_one_y); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(y_idx, Address(t0, 0)); + ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian + bind(L_multiply); + + mulhu(t0, x_xstart, y_idx); + mul(product, x_xstart, y_idx); + cad(product, product, carry, t1); + adc(carry, t0, zr, t1); + + subw(kdx, kdx, 2); + ror_imm(product, product, 32); // back to big-endian + shadd(t0, kdx, z, t0, LogBytesPerInt); + sd(product, Address(t0, 0)); + + j(L_first_loop); + + bind(L_one_y); + lwu(y_idx, Address(y, 0)); + j(L_multiply); + + bind(L_one_x); + lwu(x_xstart, Address(x, 0)); + j(L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 128 bit by 128 bit. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4, + Register tmp6, Register product_hi) +{ + // jlong carry, x[], y[], z[]; + // int kdx = xstart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; + // jlong carry2 = (jlong)(tmp3 >>> 64); + // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; + // carry = (jlong)(tmp4 >>> 64); + // z[kdx+idx+1] = (jlong)tmp3; + // z[kdx+idx] = (jlong)tmp4; + // } + // idx += 2; + // if (idx > 0) { + // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)yz_idx1; + // carry = (jlong)(yz_idx1 >>> 64); + // } + // + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + srliw(jdx, idx, 2); + + bind(L_third_loop); + + subw(jdx, jdx, 1); + bltz(jdx, L_third_loop_exit); + subw(idx, idx, 4); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(yz_idx2, Address(t0, 0)); + ld(yz_idx1, Address(t0, wordSize)); + + shadd(tmp6, idx, z, t0, LogBytesPerInt); + + ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + ror_imm(yz_idx2, yz_idx2, 32); + + ld(t1, Address(tmp6, 0)); + ld(t0, Address(tmp6, wordSize)); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + mulhu(tmp4, product_hi, yz_idx1); + + ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian + ror_imm(t1, t1, 32, tmp); + + mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp + mulhu(carry2, product_hi, yz_idx2); + + cad(tmp3, tmp3, carry, carry); + adc(tmp4, tmp4, zr, carry); + cad(tmp3, tmp3, t0, t0); + cadc(tmp4, tmp4, tmp, t0); + adc(carry, carry2, zr, t0); + cad(tmp4, tmp4, t1, carry2); + adc(carry, carry, zr, carry2); + + ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian + ror_imm(tmp4, tmp4, 32); + sd(tmp4, Address(tmp6, 0)); + sd(tmp3, Address(tmp6, wordSize)); + + j(L_third_loop); + + bind(L_third_loop_exit); + + andi(idx, idx, 0x3); + beqz(idx, L_post_third_loop_done); + + Label L_check_1; + subw(idx, idx, 2); + bltz(idx, L_check_1); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(yz_idx1, Address(t0, 0)); + ror_imm(yz_idx1, yz_idx1, 32); + + mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 + mulhu(tmp4, product_hi, yz_idx1); + + shadd(t0, idx, z, t0, LogBytesPerInt); + ld(yz_idx2, Address(t0, 0)); + ror_imm(yz_idx2, yz_idx2, 32, tmp); + + add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); + + ror_imm(tmp3, tmp3, 32, tmp); + sd(tmp3, Address(t0, 0)); + + bind(L_check_1); + + andi(idx, idx, 0x1); + subw(idx, idx, 1); + bltz(idx, L_post_third_loop_done); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); + mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 + mulhu(carry2, tmp4, product_hi); + + shadd(t0, idx, z, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); + + add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); + + shadd(t0, idx, z, t0, LogBytesPerInt); + sw(tmp3, Address(t0, 0)); + + slli(t0, carry2, 32); + srli(carry, tmp3, 32); + orr(carry, carry, t0); + + bind(L_post_third_loop_done); +} + +/** + * Code for BigInteger::multiplyToLen() intrinsic. + * + * x10: x + * x11: xlen + * x12: y + * x13: ylen + * x14: z + * x15: zlen + * x16: tmp1 + * x17: tmp2 + * x7: tmp3 + * x28: tmp4 + * x29: tmp5 + * x30: tmp6 + * x31: tmp7 + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi) +{ + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = xlen; + const Register x_xstart = zlen; // reuse register + + mv(idx, ylen); // idx = ylen; + mv(kdx, zlen); // kdx = xlen+ylen; + mv(carry, zr); // carry = 0; + + Label L_multiply_64_x_64_loop, L_done; + + subw(xstart, xlen, 1); + bltz(xstart, L_done); + + const Register jdx = tmp1; + + if (AvoidUnalignedAccesses) { + // Check if x and y are both 8-byte aligned. + orr(t0, xlen, ylen); + andi(t0, t0, 0x1); + beqz(t0, L_multiply_64_x_64_loop); + + multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + shadd(t0, xstart, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + Label L_second_loop_unaligned; + bind(L_second_loop_unaligned); + mv(carry, zr); + mv(jdx, ylen); + subw(xstart, xstart, 1); + bltz(xstart, L_done); + sub(sp, sp, 2 * wordSize); + sd(z, Address(sp, 0)); + sd(zr, Address(sp, wordSize)); + shadd(t0, xstart, z, t0, LogBytesPerInt); + addi(z, t0, 4); + shadd(t0, xstart, x, t0, LogBytesPerInt); + lwu(product, Address(t0, 0)); + Label L_third_loop, L_third_loop_exit; + + blez(jdx, L_third_loop_exit); + + bind(L_third_loop); + subw(jdx, jdx, 1); + shadd(t0, jdx, y, t0, LogBytesPerInt); + lwu(t0, Address(t0, 0)); + mul(t1, t0, product); + add(t0, t1, carry); + shadd(tmp6, jdx, z, t1, LogBytesPerInt); + lwu(t1, Address(tmp6, 0)); + add(t0, t0, t1); + sw(t0, Address(tmp6, 0)); + srli(carry, t0, 32); + bgtz(jdx, L_third_loop); + + bind(L_third_loop_exit); + ld(z, Address(sp, 0)); + addi(sp, sp, 2 * wordSize); + shadd(t0, xstart, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + j(L_second_loop_unaligned); + } + + bind(L_multiply_64_x_64_loop); + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + Label L_second_loop_aligned; + beqz(kdx, L_second_loop_aligned); + + Label L_carry; + subw(kdx, kdx, 1); + beqz(kdx, L_carry); + + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + srli(carry, carry, 32); + subw(kdx, kdx, 1); + + bind(L_carry); + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + + bind(L_second_loop_aligned); + mv(carry, zr); // carry = 0; + mv(jdx, ylen); // j = ystart+1 + + subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_done); + + sub(sp, sp, 4 * wordSize); + sd(z, Address(sp, 0)); + + Label L_last_x; + shadd(t0, xstart, z, t0, LogBytesPerInt); + addi(z, t0, 4); + subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_last_x); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + ld(product_hi, Address(t0, 0)); + ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian + + Label L_third_loop_prologue; + bind(L_third_loop_prologue); + + sd(ylen, Address(sp, wordSize)); + sd(x, Address(sp, 2 * wordSize)); + sd(xstart, Address(sp, 3 * wordSize)); + multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, + tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); + ld(z, Address(sp, 0)); + ld(ylen, Address(sp, wordSize)); + ld(x, Address(sp, 2 * wordSize)); + ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen + addi(sp, sp, 4 * wordSize); + + addiw(tmp3, xlen, 1); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + + subw(tmp3, tmp3, 1); + bltz(tmp3, L_done); + + srli(carry, carry, 32); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + j(L_second_loop_aligned); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + lwu(product_hi, Address(x, 0)); + j(L_third_loop_prologue); + + bind(L_done); +} +#endif + +// Count bits of trailing zero chars from lsb to msb until first non-zero element. +// For LL case, one byte for one element, so shift 8 bits once, and for other case, +// shift 16 bits once. +void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) +{ + if (UseRVB) { + assert_different_registers(Rd, Rs, tmp1); + int step = isLL ? 8 : 16; + ctz(Rd, Rs); + andi(tmp1, Rd, step - 1); + sub(Rd, Rd, tmp1); + return; + } + assert_different_registers(Rd, Rs, tmp1, tmp2); + Label Loop; + int step = isLL ? 8 : 16; + li(Rd, -step); + mv(tmp2, Rs); + + bind(Loop); + addi(Rd, Rd, step); + andi(tmp1, tmp2, ((1 << step) - 1)); + srli(tmp2, tmp2, step); + beqz(tmp1, Loop); +} + +// This instruction reads adjacent 4 bytes from the lower half of source register, +// inflate into a register, for example: +// Rs: A7A6A5A4A3A2A1A0 +// Rd: 00A300A200A100A0 +void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); + li(tmp1, 0xFF); + mv(Rd, zr); + for (int i = 0; i <= 3; i++) + { + andr(tmp2, Rs, tmp1); + if (i) { + slli(tmp2, tmp2, i * 8); + } + orr(Rd, Rd, tmp2); + if (i != 3) { + slli(tmp1, tmp1, 8); + } + } +} + +// This instruction reads adjacent 4 bytes from the upper half of source register, +// inflate into a register, for example: +// Rs: A7A6A5A4A3A2A1A0 +// Rd: 00A700A600A500A4 +void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); + li(tmp1, 0xFF00000000); + mv(Rd, zr); + for (int i = 0; i <= 3; i++) + { + andr(tmp2, Rs, tmp1); + orr(Rd, Rd, tmp2); + srli(Rd, Rd, 8); + if (i != 3) { + slli(tmp1, tmp1, 8); + } + } +} + +// The size of the blocks erased by the zero_blocks stub. We must +// handle anything smaller than this ourselves in zero_words(). +const int MacroAssembler::zero_words_block_size = 8; + +// zero_words() is used by C2 ClearArray patterns. It is as small as +// possible, handling small word counts locally and delegating +// anything larger to the zero_blocks stub. It is expanded many times +// in compiled code, so it is important to keep it short. + +// ptr: Address of a buffer to be zeroed. +// cnt: Count in HeapWords. +// +// ptr, cnt, and t0 are clobbered. +address MacroAssembler::zero_words(Register ptr, Register cnt) +{ + assert(is_power_of_2(zero_words_block_size), "adjust this"); + assert(ptr == x28 && cnt == x29, "mismatch in register usage"); + assert_different_registers(cnt, t0); + + BLOCK_COMMENT("zero_words {"); + mv(t0, zero_words_block_size); + Label around, done, done16; + bltu(cnt, t0, around); + { + RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); + assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); + if (StubRoutines::riscv::complete()) { + address tpc = trampoline_call(zero_blocks); + if (tpc == NULL) { + DEBUG_ONLY(reset_labels(around)); + postcond(pc() == badAddress); + return NULL; + } + } else { + jal(zero_blocks); + } + } + bind(around); + for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { + Label l; + andi(t0, cnt, i); + beqz(t0, l); + for (int j = 0; j < i; j++) { + sd(zr, Address(ptr, 0)); + addi(ptr, ptr, 8); + } + bind(l); + } + { + Label l; + andi(t0, cnt, 1); + beqz(t0, l); + sd(zr, Address(ptr, 0)); + bind(l); + } + BLOCK_COMMENT("} zero_words"); + postcond(pc() != badAddress); + return pc(); +} + +#define SmallArraySize (18 * BytesPerLong) + +// base: Address of a buffer to be zeroed, 8 bytes aligned. +// cnt: Immediate count in HeapWords. +void MacroAssembler::zero_words(Register base, u_int64_t cnt) +{ + assert_different_registers(base, t0, t1); + + BLOCK_COMMENT("zero_words {"); + + if (cnt <= SmallArraySize / BytesPerLong) { + for (int i = 0; i < (int)cnt; i++) { + sd(zr, Address(base, i * wordSize)); + } + } else { + const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll + int remainder = cnt % unroll; + for (int i = 0; i < remainder; i++) { + sd(zr, Address(base, i * wordSize)); + } + + Label loop; + Register cnt_reg = t0; + Register loop_base = t1; + cnt = cnt - remainder; + li(cnt_reg, cnt); + add(loop_base, base, remainder * wordSize); + bind(loop); + sub(cnt_reg, cnt_reg, unroll); + for (int i = 0; i < unroll; i++) { + sd(zr, Address(loop_base, i * wordSize)); + } + add(loop_base, loop_base, unroll * wordSize); + bnez(cnt_reg, loop); + } + + BLOCK_COMMENT("} zero_words"); +} + +// base: Address of a buffer to be filled, 8 bytes aligned. +// cnt: Count in 8-byte unit. +// value: Value to be filled with. +// base will point to the end of the buffer after filling. +void MacroAssembler::fill_words(Register base, Register cnt, Register value) +{ +// Algorithm: +// +// t0 = cnt & 7 +// cnt -= t0 +// p += t0 +// switch (t0): +// switch start: +// do while cnt +// cnt -= 8 +// p[-8] = value +// case 7: +// p[-7] = value +// case 6: +// p[-6] = value +// // ... +// case 1: +// p[-1] = value +// case 0: +// p += 8 +// do-while end +// switch end + + assert_different_registers(base, cnt, value, t0, t1); + + Label fini, skip, entry, loop; + const int unroll = 8; // Number of sd instructions we'll unroll + + beqz(cnt, fini); + + andi(t0, cnt, unroll - 1); + sub(cnt, cnt, t0); + // align 8, so first sd n % 8 = mod, next loop sd 8 * n. + shadd(base, t0, base, t1, 3); + la(t1, entry); + slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) + sub(t1, t1, t0); + jr(t1); + + bind(loop); + add(base, base, unroll * 8); + for (int i = -unroll; i < 0; i++) { + sd(value, Address(base, i * 8)); + } + bind(entry); + sub(cnt, cnt, unroll); + bgez(cnt, loop); + + bind(fini); +} + +#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ +void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ + Label L_Okay; \ + fscsr(zr); \ + FLOATCVT(dst, src); \ + frcsr(tmp); \ + andi(tmp, tmp, 0x1E); \ + beqz(tmp, L_Okay); \ + FLOATEQ(tmp, src, src); \ + bnez(tmp, L_Okay); \ + mv(dst, zr); \ + bind(L_Okay); \ +} + +FCVT_SAFE(fcvt_w_s, feq_s) +FCVT_SAFE(fcvt_l_s, feq_s) +FCVT_SAFE(fcvt_w_d, feq_d) +FCVT_SAFE(fcvt_l_d, feq_d) + +#undef FCVT_SAFE + +#define FCMP(FLOATTYPE, FLOATSIG) \ +void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ + FloatRegister Rs2, int unordered_result) { \ + Label Ldone; \ + if (unordered_result < 0) { \ + /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ + /* installs 1 if gt else 0 */ \ + flt_##FLOATSIG(result, Rs2, Rs1); \ + /* Rs1 > Rs2, install 1 */ \ + bgtz(result, Ldone); \ + feq_##FLOATSIG(result, Rs1, Rs2); \ + addi(result, result, -1); \ + /* Rs1 = Rs2, install 0 */ \ + /* NaN or Rs1 < Rs2, install -1 */ \ + bind(Ldone); \ + } else { \ + /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ + /* installs 1 if gt or unordered else 0 */ \ + flt_##FLOATSIG(result, Rs1, Rs2); \ + /* Rs1 < Rs2, install -1 */ \ + bgtz(result, Ldone); \ + feq_##FLOATSIG(result, Rs1, Rs2); \ + addi(result, result, -1); \ + /* Rs1 = Rs2, install 0 */ \ + /* NaN or Rs1 > Rs2, install 1 */ \ + bind(Ldone); \ + neg(result, result); \ + } \ +} + +FCMP(float, s); +FCMP(double, d); + +#undef FCMP + +// Zero words; len is in bytes +// Destroys all registers except addr +// len must be a nonzero multiple of wordSize +void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { + assert_different_registers(addr, len, tmp, t0, t1); + +#ifdef ASSERT + { + Label L; + andi(t0, len, BytesPerWord - 1); + beqz(t0, L); + stop("len is not a multiple of BytesPerWord"); + bind(L); + } +#endif // ASSERT + +#ifndef PRODUCT + block_comment("zero memory"); +#endif // PRODUCT + + Label loop; + Label entry; + + // Algorithm: + // + // t0 = cnt & 7 + // cnt -= t0 + // p += t0 + // switch (t0) { + // do { + // cnt -= 8 + // p[-8] = 0 + // case 7: + // p[-7] = 0 + // case 6: + // p[-6] = 0 + // ... + // case 1: + // p[-1] = 0 + // case 0: + // p += 8 + // } while (cnt) + // } + + const int unroll = 8; // Number of sd(zr) instructions we'll unroll + + srli(len, len, LogBytesPerWord); + andi(t0, len, unroll - 1); // t0 = cnt % unroll + sub(len, len, t0); // cnt -= unroll + // tmp always points to the end of the region we're about to zero + shadd(tmp, t0, addr, t1, LogBytesPerWord); + la(t1, entry); + slli(t0, t0, 2); + sub(t1, t1, t0); + jr(t1); + bind(loop); + sub(len, len, unroll); + for (int i = -unroll; i < 0; i++) { + Assembler::sd(zr, Address(tmp, i * wordSize)); + } + bind(entry); + add(tmp, tmp, unroll * wordSize); + bnez(len, loop); +} + +// shift left by shamt and add +// Rd = (Rs1 << shamt) + Rs2 +void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { + if (UseRVB) { + if (shamt == 1) { + sh1add(Rd, Rs1, Rs2); + return; + } else if (shamt == 2) { + sh2add(Rd, Rs1, Rs2); + return; + } else if (shamt == 3) { + sh3add(Rd, Rs1, Rs2); + return; + } + } + + if (shamt != 0) { + slli(tmp, Rs1, shamt); + add(Rd, Rs2, tmp); + } else { + add(Rd, Rs1, Rs2); + } +} + +void MacroAssembler::zero_extend(Register dst, Register src, int bits) { + if (UseRVB) { + if (bits == 16) { + zext_h(dst, src); + return; + } else if (bits == 32) { + zext_w(dst, src); + return; + } + } + + if (bits == 8) { + zext_b(dst, src); + } else { + slli(dst, src, XLEN - bits); + srli(dst, dst, XLEN - bits); + } +} + +void MacroAssembler::sign_extend(Register dst, Register src, int bits) { + if (UseRVB) { + if (bits == 8) { + sext_b(dst, src); + return; + } else if (bits == 16) { + sext_h(dst, src); + return; + } + } + + if (bits == 32) { + sext_w(dst, src); + } else { + slli(dst, src, XLEN - bits); + srai(dst, dst, XLEN - bits); + } +} + +void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) +{ + if (src1 == src2) { + mv(dst, zr); + return; + } + Label done; + Register left = src1; + Register right = src2; + if (dst == src1) { + assert_different_registers(dst, src2, tmp); + mv(tmp, src1); + left = tmp; + } else if (dst == src2) { + assert_different_registers(dst, src1, tmp); + mv(tmp, src2); + right = tmp; + } + + // installs 1 if gt else 0 + slt(dst, right, left); + bnez(dst, done); + slt(dst, left, right); + // dst = -1 if lt; else if eq , dst = 0 + neg(dst, dst); + bind(done); +} + +void MacroAssembler::safepoint_ifence() { + ifence(); +#ifndef PRODUCT + if (VerifyCrossModifyFence) { + // Clear the thread state. + sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); + } +#endif +} + +#ifndef PRODUCT +void MacroAssembler::verify_cross_modify_fence_not_required() { + if (VerifyCrossModifyFence) { + // Check if thread needs a cross modify fence. + lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); + Label fence_not_required; + beqz(t0, fence_not_required); + // If it does then fail. + la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); + mv(c_rarg0, xthread); + jalr(t0); + bind(fence_not_required); + } +} +#endif Index: jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp =================================================================== --- /dev/null +++ jdk17u-jdk-17.0.5-8/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -0,0 +1,881 @@ +/* + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP +#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP + +#include "asm/assembler.hpp" +#include "metaprogramming/enableIf.hpp" +#include "oops/compressedOops.hpp" +#include "utilities/powerOfTwo.hpp" + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) { + } + virtual ~MacroAssembler() {} + + void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); + + // Place a fence.i after code may have been modified due to a safepoint. + void safepoint_ifence(); + + // Alignment + void align(int modulus, int extra_offset = 0); + + // Stack frame creation/removal + // Note that SP must be updated to the right place before saving/restoring RA and FP + // because signal based thread suspend/resume could happen asynchronously. + void enter() { + addi(sp, sp, - 2 * wordSize); + sd(ra, Address(sp, wordSize)); + sd(fp, Address(sp)); + addi(fp, sp, 2 * wordSize); + } + + void leave() { + addi(sp, fp, - 2 * wordSize); + ld(fp, Address(sp)); + ld(ra, Address(sp, wordSize)); + addi(sp, sp, 2 * wordSize); + } + + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result(Register oop_result, Register java_thread); + void get_vm_result_2(Register metadata_result, Register java_thread); + + // These always tightly bind to MacroAssembler::call_VM_leaf_base + // bypassing the virtual implementation + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_0); + void call_VM_leaf(address entry_point, + Register arg_0, Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_0, Register arg_1, Register arg_2); + + // These always tightly bind to MacroAssembler::call_VM_base + // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point, Register arg_0); + void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); + + // thread in the default location (xthread) + void reset_last_Java_frame(bool clear_fp); + + void call_native(address entry_point, + Register arg_0); + void call_native_base( + address entry_point, // the entry point + Label* retaddr = NULL + ); + + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label* retaddr = NULL + ); + + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label& retaddr) { + call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); + } + + virtual void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); + + virtual void check_and_handle_earlyret(Register java_thread); + virtual void check_and_handle_popframe(Register java_thread); + + void resolve_weak_handle(Register result, Register tmp); + void resolve_oop_handle(Register result, Register tmp = x15); + void resolve_jobject(Register value, Register thread, Register tmp); + + void movoop(Register dst, jobject obj, bool immediate = false); + void mov_metadata(Register dst, Metadata* obj); + void bang_stack_size(Register size, Register tmp); + void set_narrow_oop(Register dst, jobject obj); + void set_narrow_klass(Register dst, Klass* k); + + void load_mirror(Register dst, Register method, Register tmp = x15); + void access_load_at(BasicType type, DecoratorSet decorators, Register dst, + Address src, Register tmp1, Register thread_tmp); + void access_store_at(BasicType type, DecoratorSet decorators, Address dst, + Register src, Register tmp1, Register thread_tmp); + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); + void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); + void decode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop(Register d, Register s); + void decode_heap_oop(Register r) { decode_heap_oop(r, r); } + void encode_heap_oop(Register d, Register s); + void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + + void store_klass_gap(Register dst, Register src); + + // currently unimplemented + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (linke NULL) into a Register by giving + // the compiler two choices it can't resolve + + void store_heap_oop(Address dst, void* dummy); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generateion is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + virtual void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + static bool uses_implicit_null_check(void* address); + + // idiv variant which deals with MINLONG as dividend and -1 as divisor + int corrected_idivl(Register result, Register rs1, Register rs2, + bool want_remainder); + int corrected_idivq(Register result, Register rs1, Register rs2, + bool want_remainder); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_tmp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + // n.n. x86 allows RegisterOrConstant for vtable_index + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Form an addres from base + offset in Rd. Rd my or may not + // actually be used: you must use the Address that is returned. It + // is up to you to ensure that the shift provided mathces the size + // of your data. + Address form_address(Register Rd, Register base, long byte_offset); + + // allocation + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case, // continuation point of fast allocation fails + bool is_far = false + ); + + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp, // temp register + Label& slow_case, // continuation point if fast allocation fails + bool is_far = false + ); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except tmp_reg + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + Register super_check_offset = noreg); + + // The reset of the type cehck; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure); + + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register tmp_reg, + Label& L_success); + + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char* s = "broken oop addr"); + + void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} + void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} + +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // A more convenient access to fence for our purposes + // We used four bit to indicate the read and write bits in the predecessors and successors, + // and extended i for r, o for w if UseConservativeFence enabled. + enum Membar_mask_bits { + StoreStore = 0b0101, // (pred = ow + succ = ow) + LoadStore = 0b1001, // (pred = ir + succ = ow) + StoreLoad = 0b0110, // (pred = ow + succ = ir) + LoadLoad = 0b1010, // (pred = ir + succ = ir) + AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) + }; + + void membar(uint32_t order_constraint); + + static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { + predecessor = (order_constraint >> 2) & 0x3; + successor = order_constraint & 0x3; + + // extend rw -> iorw: + // 01(w) -> 0101(ow) + // 10(r) -> 1010(ir) + // 11(rw)-> 1111(iorw) + if (UseConservativeFence) { + predecessor |= predecessor << 2; + successor |= successor << 2; + } + } + + static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { + return ((predecessor & 0x3) << 2) | (successor & 0x3); + } + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + static void debug64(char* msg, int64_t pc, int64_t regs[]); + + void unimplemented(const char* what = ""); + + void should_not_reach_here() { stop("should not reach here"); } + + static address target_addr_for_insn(address insn_addr); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + static int pd_patch_instruction_size(address branch, address target); + static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { + pd_patch_instruction_size(branch, target); + } + static address pd_call_destination(address branch) { + return target_addr_for_insn(branch); + } + + static int patch_oop(address insn_addr, address o); + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + void emit_static_call_stub(); + + // The following 4 methods return the offset of the appropriate move instruction + + // Support for fast byte/short loading with zero extension (depending on particular CPU) + int load_unsigned_byte(Register dst, Address src); + int load_unsigned_short(Register dst, Address src); + + // Support for fast byte/short loading with sign extension (depending on particular CPU) + int load_signed_byte(Register dst, Address src); + int load_signed_short(Register dst, Address src); + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + public: + // Standard pseudoinstruction + void nop(); + void mv(Register Rd, Register Rs); + void notr(Register Rd, Register Rs); + void neg(Register Rd, Register Rs); + void negw(Register Rd, Register Rs); + void sext_w(Register Rd, Register Rs); + void zext_b(Register Rd, Register Rs); + void seqz(Register Rd, Register Rs); // set if = zero + void snez(Register Rd, Register Rs); // set if != zero + void sltz(Register Rd, Register Rs); // set if < zero + void sgtz(Register Rd, Register Rs); // set if > zero + + // Float pseudoinstruction + void fmv_s(FloatRegister Rd, FloatRegister Rs); + void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value + void fneg_s(FloatRegister Rd, FloatRegister Rs); + + // Double pseudoinstruction + void fmv_d(FloatRegister Rd, FloatRegister Rs); + void fabs_d(FloatRegister Rd, FloatRegister Rs); + void fneg_d(FloatRegister Rd, FloatRegister Rs); + + // Pseudoinstruction for control and status register + void rdinstret(Register Rd); // read instruction-retired counter + void rdcycle(Register Rd); // read cycle counter + void rdtime(Register Rd); // read time + void csrr(Register Rd, unsigned csr); // read csr + void csrw(unsigned csr, Register Rs); // write csr + void csrs(unsigned csr, Register Rs); // set bits in csr + void csrc(unsigned csr, Register Rs); // clear bits in csr + void csrwi(unsigned csr, unsigned imm); + void csrsi(unsigned csr, unsigned imm); + void csrci(unsigned csr, unsigned imm); + void frcsr(Register Rd); // read float-point csr + void fscsr(Register Rd, Register Rs); // swap float-point csr + void fscsr(Register Rs); // write float-point csr + void frrm(Register Rd); // read float-point rounding mode + void fsrm(Register Rd, Register Rs); // swap float-point rounding mode + void fsrm(Register Rs); // write float-point rounding mode + void fsrmi(Register Rd, unsigned imm); + void fsrmi(unsigned imm); + void frflags(Register Rd); // read float-point exception flags + void fsflags(Register Rd, Register Rs); // swap float-point exception flags + void fsflags(Register Rs); // write float-point exception flags + void fsflagsi(Register Rd, unsigned imm); + void fsflagsi(unsigned imm); + + void beqz(Register Rs, const address &dest); + void bnez(Register Rs, const address &dest); + void blez(Register Rs, const address &dest); + void bgez(Register Rs, const address &dest); + void bltz(Register Rs, const address &dest); + void bgtz(Register Rs, const address &dest); + void la(Register Rd, Label &label); + void la(Register Rd, const address &dest); + void la(Register Rd, const Address &adr); + //label + void beqz(Register Rs, Label &l, bool is_far = false); + void bnez(Register Rs, Label &l, bool is_far = false); + void blez(Register Rs, Label &l, bool is_far = false); + void bgez(Register Rs, Label &l, bool is_far = false); + void bltz(Register Rs, Label &l, bool is_far = false); + void bgtz(Register Rs, Label &l, bool is_far = false); + void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + void float_ble(FloatRegister Rs1, FloatRegister Rs2,